#!/usr/bin/env perl
use warnings;
use strict;
$|++;
use Getopt::Long;
use Cwd;
use Carp;
use FindBin qw($RealBin);
use lib "$RealBin/../lib";

## This program is Copyright (C) 2010-20, Felix Krueger (felix.krueger@babraham.ac.uk)

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.

my @filenames; # input files
my %counting;
my $parent_dir = getcwd();

my %fhs;

my $version = 'v0.23.0';
my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_off,$mbias_only,$gazillion,$ample_mem,$ignore_3prime,$ignore_3prime_r2,$multicore,$yacht,$ucsc) = process_commandline();

### only needed for bedGraph output
my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files
my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total
my @bedfiles;
my %chromosome_sizes;  # required for UCSC (--ucsc) bedGraph processing

### only needed for genome-wide cytosine methylation report
my %chromosomes;

my %mbias_1;
my %mbias_2;


##############################################################################################
### Summarising Run Parameters
##############################################################################################

### METHYLATION EXTRACTOR

warn "Summarising Bismark methylation extractor parameters:\n";
warn '='x63,"\n";

if ($single){
    warn "Bismark single-end SAM format specified (default)\n"; # default
}
elsif ($paired){
   warn "Bismark paired-end SAM format specified (default)\n"; # default
}

warn "Number of cores to be used: $multicore\n";

if ($single){
	if ($ignore){
		warn "First $ignore bp will be disregarded when processing the methylation call string\n";
	}
	if ($ignore_3prime){
		warn "Last $ignore_3prime bp will be disregarded when processing the methylation call string\n";
	}
}
else{ ## paired-end
	if ($ignore){
		warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\n";
	}
	if ($ignore_r2){
		warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\n";
	}

	if ($ignore_3prime){
		warn "Last $ignore_3prime bp will be disregarded when processing the methylation call string of Read 1\n";
	}
	if ($ignore_3prime_r2){
		warn "Last $ignore_3prime_r2 bp will be disregarded when processing the methylation call string of Read 2\n";
	}
}


if ($full){
	warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\n";
}
if ($merge_non_CpG){
	warn "Merge CHG and CHH context to non-CpG context specified\n";
}
### output directory
if ($output_dir eq ''){
	warn "Output will be written to the current directory ('$parent_dir')\n";
}
else{
	warn "Output path specified as: $output_dir\n";
}



### BEDGRAPH

if ($bedGraph){
	warn "\n\nSummarising bedGraph parameters:\n";
	warn '='x63,"\n";

	if ($counts){
		warn "Generating additional output in bedGraph and coverage format\nbedGraph format:\t<Chromosome> <Start Position> <End Position> <Methylation Percentage>\ncoverage format:\t<Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>\n\n";
	}
	else{
		warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\n";
	}

	### Zero-based coordinates
	if ($zero){
		warn "Writing out an additional coverage file (ending in zero.cov) with 0-based start and 1-based end genomic coordinates (zero-based, half-open; user-defined)\n";
	}

	warn "Using a cutoff of $coverage_threshold read(s) to report cytosine positions\n";

	if ($CX_context){
		warn "Reporting and sorting methylation information for all cytosine context (sorting may take a long time, you have been warned ...)\n";
	}
	else{ # default
		$CpG_only = 1;
		warn "Reporting and sorting cytosine methylation information in CpG context only (default)\n";
	}

	if ($remove){
		warn "White spaces in read ID names will be removed prior to sorting\n";
	}

	if ($ample_mem){
		warn "Sorting chromosomal postions for the bedGraph step using arrays instead of using UNIX sort\n";
	}
	elsif (defined $sort_size){
		warn "The bedGraph UNIX sort command will use the following memory setting:\t'$sort_size'. Temporary directory used for sorting is the output directory\n";
	}
	else{
		warn "Setting a default memory usage for the bedGraph UNIX sort command to 2GB\n";
	}

	if ($ucsc){
		warn "Writing out a UCSC compatible version of the bedGraph file. Data aligned to Ensembl genomes will have chromosome names prefixed with 'chr',\nand 'MT' replaced with 'chrM'\n";
		warn "In addition, this option writes out a tab-delimited genome-size file (extracted from the BAM header) to enable tools such as bedGraphToBigWig\n";
	}

	if ($cytosine_report){
		warn "\n\nSummarising genome-wide cytosine methylation report parameters:\n";
		warn '='x63,"\n";
		warn "Generating comprehensive genome-wide cytosine report\n(output format: <Chromosome> <Position> <Strand> <count methylated> <count non-methylated>  <C-context>  <trinucleotide context> )\n";

		if ($CX_context){
			warn "Reporting methylation for all cytosine contexts. Be aware that this will generate enormous files\n";
		}
		else{ # default
			$CpG_only = 1;
			warn "Reporting cytosine methylation in CpG context only (default)\n";
		}

		if ($split_by_chromosome){
			warn "Splitting the cytosine report output up into individual files for each chromosome\n";
		}

		### Zero-based coordinates
		if ($zero){
			warn "Using zero-based start and 1-based end genomic coordinates (zero-based, half-open; user-defined)\n";
		}
		else{ # default, 1-based coords
			warn "Using 1-based genomic coordinates (default)\n";
		}

		### GENOME folder
		if ($genome_folder){
			unless ($genome_folder =~/\/$/){
				$genome_folder =~ s/$/\//;
			}
			unless (-d $genome_folder){
				die "The specified genome folder does not appear to be a directory. Please respecify and try again!\n\n";
			}
			warn "Genome folder was specified as $genome_folder\n";
		}
		else{
			$genome_folder  = '/data/public/Genomes/Mouse/NCBIM37/';
			warn "Using the default genome folder /data/public/Genomes/Mouse/NCBIM37/\n";
		}
		sleep (1);
	}
}

warn "\n";

######################################################
### PROCESSING FILES
######################################################

foreach my $filename (@filenames){
	# resetting counters and filehandles
	%fhs = ();
	%counting =(
	    total_meCHG_count            => 0,
	    total_meCHH_count            => 0,
	    total_meCpG_count            => 0,
	    total_unmethylated_CHG_count => 0,
	    total_unmethylated_CHH_count => 0,
		total_unmethylated_CpG_count => 0,
	    sequences_count              => 0,
	    methylation_call_strings     => 0,
	    );

	@sorting_files = ();
	@bedfiles = ();

	%mbias_1 = ();
	%mbias_2 = ();

	# Testing if the file appears to be truncated, in which case we bail with a big scary warning message
	if ($filename =~ /(\.bam$)/){
		bam_isTruncated($filename);
	}
  
	### performing a quick check to see if a paired-end SAM file has been sorted by positions which does interfere with the logic used by the extractor
	if ($paired){
		test_positional_sorting($filename);
	}

	### Recording chromosome sizes
	if ($ucsc){
		%chromosome_sizes = (); # resetting
		get_chromosome_sizes($filename);

		my $chromosome_sizes_file = $filename;
		$chromosome_sizes_file =~ s/bam$//;
		$chromosome_sizes_file .= "chromosome_sizes.txt";
		open (SIZEABLE,">",$chromosome_sizes_file) or die "Failed to write to chromosome sizes file >$chromosome_sizes_file<: $!";
		warn "Writing out file with recorded chromosome sizes to $chromosome_sizes_file\n";
		foreach my $chr (sort keys %chromosome_sizes){
			print SIZEABLE "$chr\t$chromosome_sizes{$chr}\n";
		}
	}

	my ($pid,$pids,$report_basename) = process_Bismark_results_file($filename);
  
	if ($pid == 0){
		warn "Finished processing child process. Exiting..\n";
      
		#  ### Closing all filehandles of the child process so that the Bismark methylation extractor output doesn't get truncated due to buffering issues
		#     foreach my $fh (keys %fhs) {
		#       if ($fh =~ /^[1230]$/) {
		# 	foreach my $context (keys %{$fhs{$fh}}) {
		# 	  $fhs{$fh}->{$context}->flush;
		# 	}
		#       }
		#       else{
		# 	$fhs{$fh}->flush;
		#       }
		# }
		exit 0;
	}

	###
	if ($pid and $multicore > 1){
		warn "Now waiting for all child processes to complete\n";
		sleep(1);

		### we need to ensure that we wait for all child processes to be finished before continuing
		# warn "here are the child IDs: @$pids\n";
		# warn "Looping through the child process IDs:\n";

		foreach my $id (@$pids){
			# print "$id\t";
			my $kid = waitpid ($id,0);
			# print "Returned: $kid\nExit status: $?\n";
			unless ($? == 0){
				warn "\nChild process terminated with exit signal: '$?'\n\n";
			}
		}
	}

	### Closing all filehandles so that the Bismark methylation extractor output doesn't get truncated due to buffering issues
	foreach my $fh (keys %fhs) {
		if ($fh =~ /^[1230]$/) {
			foreach my $context (keys %{$fhs{$fh}}) {
				close $fhs{$fh}->{$context} or die $!;
			}
		}
		else{
			close $fhs{$fh} or die $!;
		}
	}

	### We need to stitch together a main splitting report from all individual parent/child processes
	if ($multicore > 1){
		merge_individual_splitting_reports($report_basename);
		print_splitting_report();
	
		merge_individual_mbias_reports($report_basename); # this updates the main %mbias_1 and  %mbias_2 data structures so we can proceed normally
	}

	unless ($mbias_off){
		### printing out all M-Bias data
		produce_mbias_plots ($filename);    produce_mbias_plots ($filename);
	}

	unless ($mbias_only){
		delete_unused_files();
	}

	if ($bedGraph){

		my $out = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
		$out =~ s/gz$//;
		$out =~ s/sam$//;
		$out =~ s/bam$//;
		$out =~ s/txt$//;
		$out =~ s/$/bedGraph/;

		my $bedGraph_output = $out;
		my @args;

		if ($remove){
			push @args, '--remove';
		}
		if ($CX_context){
			push @args, '--CX_context';
		}
		if ($no_header){
			push @args, '--no_header';
		}
		if ($gazillion){
			push @args, '--gazillion';
		}
		if ($ample_mem){
			push @args, '--ample_memory';
		}
		else{
			push @args, "--buffer_size $sort_size";
		}

		if ($ucsc){
			push @args, "--ucsc";
		}

		if ($zero){
			push @args, "--zero";
		}

		#   if ($counts){
		#      push @args, "--counts";
		#   }

		push @args, "--cutoff $coverage_threshold";
		push @args, "--output $bedGraph_output";
		push @args, "--dir '$output_dir'";

		### adding all files to be sorted to @args
		foreach my $f (@sorting_files){
		push @args, $f;
		}

		#  print join "\t",@args,"\n";

		system ("$RealBin/bismark2bedGraph @args");

		warn "Finished BedGraph conversion ...\n\n";
		sleep(1);

		# open (OUT,'>',$output_dir.$bedGraph_output) or die "Problems with the bedGraph output filename detected: file path: '$output_dir'\tfile name: '$bedGraph_output' $!";
		# warn "Writing bedGraph to file: $bedGraph_output\n";
		# process_bedGraph_output();
		# close OUT or die $!;

		### genome-wide cytosine methylation report requires bedGraph processing anyway
		if ($cytosine_report){

			@args = (); # resetting @args
			my $cytosine_out = $out;
			$cytosine_out =~ s/bedGraph$//;

			if ($CX_context){
				$cytosine_out =~ s/$/CX_report.txt/;
			}
			else{
				$cytosine_out =~ s/$/CpG_report.txt/;
			}

			push @args, "--output $cytosine_out";
			push @args, "--dir '$output_dir'";
			push @args, "--genome '$genome_folder'";
			push @args, "--parent_dir '$output_dir'"; # 21 July 2018: changing parent dir to $output_dir because this is the folder that will contain a coverage file 

			if ($zero){
				push @args, "--zero";
			}
			if ($CX_context){
				push @args, '--CX_context';
			}
			if ($split_by_chromosome){
				push @args, '--split_by_chromosome';
			}
			if ($gzip){
				push @args, '--gzip';
			}

			my $coverage_output = $bedGraph_output;
			$coverage_output =~ s/bedGraph$/bismark.cov.gz/;
      
			push @args, $coverage_output; # this will be the infile
			# warn "Handing over the following infile: $coverage_output\n"; sleep (5);
			system ("$RealBin/coverage2cytosine @args");
		
			warn "\n\nFinished generating genome-wide cytosine report\n\n";
		}
	}
}	

sub merge_individual_splitting_reports{

  my $report_basename = shift;

  my @splitting_reports; # only needed in multi-core mode to generate an overall report
  foreach my $ext (1..$multicore){
    push @splitting_reports, "$report_basename.$ext";
  }
  warn "\nMerging individual splitting reports into overall report: '$report_basename'\n";
  warn "Merging from these individual files:\n";
  print join ("\n",@splitting_reports),"\n\n";
  sleep(1);

  ##########
  # resetting the counter first
  %counting =(
	      total_meCHG_count            => 0,
	      total_meCHH_count            => 0,
	      total_meCpG_count            => 0,
	      total_unmethylated_CHG_count => 0,
	      total_unmethylated_CHH_count => 0,
	      total_unmethylated_CpG_count => 0,
	      sequences_count              => 0,
	      methylation_call_strings     => 0,
	     );

  # repopulating the merged counter
  foreach my $file (@splitting_reports){
    open (IR,$file) or die $!;
    while (<IR>){
      chomp;
      my ($context,$count) = (split /\t/);
      if ($context){
	if ($context =~ /^Total C to T conversions in CpG context/){
	  # warn "context: $context\ncount: $count\n";
	  $counting{total_unmethylated_CpG_count} += $count;
	}
	elsif ($context =~ /^Total C to T conversions in CHG context/){
	  # warn "context: $context\ncount: $count\n";
	  $counting{total_unmethylated_CHG_count} += $count;
	}
	elsif ($context =~ /^Total C to T conversions in CHH context/){
	  # warn "context: $context\ncount: $count\n";
	  $counting{total_unmethylated_CHH_count} += $count;
	}
	elsif ($context =~ /^Total methylated C\'s in CpG context/){
	  # warn "context: $context\ncount: $count\n";
	  $counting{total_meCpG_count} += $count;
	}
	elsif ($context =~ /^Total methylated C\'s in CHG context/){
	  # warn "context: $context\ncount: $count\n";
	  $counting{total_meCHG_count} += $count;
	}
	elsif ($context =~ /^Total methylated C\'s in CHH context/){
	  # warn "context: $context\ncount: $count\n";
	  $counting{total_meCHH_count} += $count;
	}
	elsif ($context =~ /^line count/){
	  # warn "Line count\ncount: $count\n";
	  $counting{sequences_count} = $count; # always the same
	}
	elsif ($context =~ /^meth call strings/){
	  # warn "Meth call strings\ncount: $count\n";
	  $counting{methylation_call_strings} += $count;
	}
      }
    }
  }

  # deleting the individual reports afterwards
  foreach my $file (@splitting_reports){
    unlink $file;
  }
}


sub merge_individual_mbias_reports{

  my $report_basename = shift;

  my @mbias_reports; # only needed in multi-core mode to generate an overall report
  foreach my $ext (1..$multicore){
    push @mbias_reports, "$report_basename.${ext}.mbias";
  }
  warn "\nMerging individual M-bias reports into overall M-bias statistics from these $multicore individual files:\n";
  print join ("\n",@mbias_reports),"\n\n";


  ##########
  # resetting the counters first, then repopulating them
  %mbias_1 = ();
  %mbias_2 = ();

  # repopulating the merged counter
  foreach my $file (@mbias_reports){
    open (IR,$file) or die $!;

    my $context;
    my $read;

    while (<IR>){
      chomp;
      # warn "$_\n"; sleep(1);
      if ($_ =~ /context/){
	$context = $1 if ($_ =~ /(\D{3}) context/);
	# warn "Context set as $context\n";
	
	if ($_ =~ /R2/){
	  $read = 'R2';
	}
	else{
	  $read = 'R1';
	}
	# warn "Setting read identity to '$read'\n";
	
	# reading in 2 additional lines (===========, and header line)
	$_ = <IR>;
	#warn "discarding line $_\n";
	$_ = <IR>;
	#warn "discarding line $_\n";
	next;
      }
      else{

	if ($_ eq ''){
	  # empty line, only occurs after a context has finished and before a new context starts
	  next;
	}

	my ($pos,$meth,$unmeth) = (split /\t/);
	# warn "$pos\t$meth\t$unmeth\n"; sleep(1);
	if ($read eq 'R1'){
	  $mbias_1{$context}->{$pos}->{meth} += $meth;
	  $mbias_1{$context}->{$pos}->{un} += $unmeth;
	}
	elsif ($read eq 'R2'){
	  $mbias_2{$context}->{$pos}->{meth} += $meth;
	  $mbias_2{$context}->{$pos}->{un} += $unmeth;
	}
      }
    }
    close IR or warn "Had trouble closing filehandle for $file: $!\n";
  }

  # deleting the individual reports afterwards
  foreach my $file (@mbias_reports){
    unlink $file;
  }
}


sub delete_unused_files{

	warn "Deleting unused files ...\n\n"; sleep(1);

	my $index = 0;

	while ($index <= $#sorting_files){
		if ($sorting_files[$index] =~ /gz$/){
			open (USED,"gunzip -c $sorting_files[$index] |") or die "Failed to read from methylation extractor output file $sorting_files[$index]: $!\n";
		}
		else{
			open (USED,$sorting_files[$index]) or die "Failed to read from methylation extractor output file $sorting_files[$index]: $!\n";
		}

		my $used = 0;

		while (<USED>){
			next if (/^Bismark/);
			if ($_){
			$used = 1;
			last;
		}
	}

    if ($used){
		warn "$sorting_files[$index] contains data ->\tkept\n";
		++$index;
    }
    else{

		my $delete = unlink $sorting_files[$index];

		if ($delete){
			warn "$sorting_files[$index] was empty ->\tdeleted\n";
		}
		else{
			warn "$sorting_files[$index] was empty, however deletion was unsuccessful: $!\n"
		}

		### we also need to remove the element from @sorting_files
		splice @sorting_files, $index, 1;
		}
	}
	warn "\n\n"; ## can't close the piped filehandles at this point because it will die (unfortunately)
}

sub produce_mbias_plots{

  my $filename = shift;

  my $mbias = (split (/\//,$filename))[-1]; # extracting the filename if a full path was specified
  $mbias =~ s/gz$//;
  $mbias =~ s/sam$//;
  $mbias =~ s/bam$//;
  $mbias =~ s/cram$//;
  $mbias =~ s/txt$//;
   my $mbias_graph_1 = my $mbias_graph_2 = $mbias;
  $mbias_graph_1 = $output_dir . $mbias_graph_1 . 'M-bias_R1.png';
  $mbias_graph_2 = $output_dir . $mbias_graph_2 . 'M-bias_R2.png';

  $mbias =~ s/$/M-bias.txt/;

  open (MBIAS,'>',"$output_dir$mbias") or die "Failed to open file for the M-bias data\n\n";

  # determining maximum read length
  my $max_length_1 = 0;
  my $max_length_2 = 0;

  foreach my $context (keys %mbias_1){
    foreach my $pos (sort {$a<=>$b} keys %{$mbias_1{$context}}){
      $max_length_1 = $pos unless ($max_length_1 >= $pos);
    }
  }
  if ($paired){
    foreach my $context (keys %mbias_2){
      foreach my $pos (sort {$a<=>$b} keys %{$mbias_2{$context}}){
	$max_length_2 = $pos unless ($max_length_2 >= $pos);
      }
    }
  }

  if ($single){
    warn "Determining maximum read length for M-Bias plot\n";
    warn "Maximum read length of Read 1: $max_length_1\n\n";
  }
  else{
    warn "Determining maximum read lengths for M-Bias plots\n";
    warn "Maximum read length of Read 1: $max_length_1\n";
    warn "Maximum read length of Read 2: $max_length_2\n\n";
  }
  # sleep(3);

  my @mbias_read1;
  my @mbias_read2;

  #Check whether the module GD::Graph:lines is installed
  my $gd_graph_installed = 0;
  eval{
    require GD::Graph::lines;
    GD::Graph::lines->import();
  };

  unless($@) { # syntax or routine error variable, set if something goes wrong in the last eval{ require ...}
    $gd_graph_installed = 1;

    #Check whether the module GD::Graph::colour is installed
    eval{
      require GD::Graph::colour;
      GD::Graph::colour->import(qw(:colours :lists :files :convert));
    };

    if ($@) {
      warn "Perl module GD::Graph::colour not found, skipping drawing M-bias plots (only writing out M-bias plot table)\n";
      sleep(2);
      $gd_graph_installed = 0;
    }


  }
  else{
    warn "Perl module GD::Graph::lines is not installed, skipping drawing M-bias plots (only writing out M-bias plot table)\n";
    sleep(2);
  }


  my $graph_title;
  my $graph1;
  my $graph2;

  if ( $gd_graph_installed){
    $graph1 = GD::Graph::lines->new(800,600);
    if ($paired){
      $graph2 = GD::Graph::lines->new(800,600);
    }
  }

  foreach my $context (qw(CpG CHG CHH)){
    @{$mbias_read1[0]} = ();

    if ($paired){
      print MBIAS "$context context (R1)\n================\n";
      $graph_title = 'M-bias (Read 1)';
    }
    else{
      print MBIAS "$context context\n===========\n";
      $graph_title = 'M-bias';
    }
    print MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";

    foreach my $pos (1..$max_length_1){

      unless (defined $mbias_1{$context}->{$pos}->{meth}){
	$mbias_1{$context}->{$pos}->{meth} = 0;
      }
      unless (defined $mbias_1{$context}->{$pos}->{un}){
	$mbias_1{$context}->{$pos}->{un} = 0;
      }

      my $percent = '';
      if (($mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) > 0){
	$percent = sprintf("%.2f",$mbias_1{$context}->{$pos}->{meth} * 100/ ( $mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) );
      }
      my $coverage = $mbias_1{$context}->{$pos}->{un} + $mbias_1{$context}->{$pos}->{meth};

      print MBIAS "$pos\t$mbias_1{$context}->{$pos}->{meth}\t$mbias_1{$context}->{$pos}->{un}\t$percent\t$coverage\n";
      push @{$mbias_read1[0]},$pos;

      if ($context eq 'CpG'){
	push @{$mbias_read1[1]},$percent;
	push @{$mbias_read1[4]},$coverage;
      }
      elsif ($context eq 'CHG'){
	push @{$mbias_read1[2]},$percent;
	push @{$mbias_read1[5]},$coverage;
      }
      elsif ($context eq 'CHH'){
    	push @{$mbias_read1[3]},$percent;
	push @{$mbias_read1[6]},$coverage;
      }
    }
    print MBIAS "\n";
  }

  if ( $gd_graph_installed){

    add_colour(nice_blue => [31,120,180]);
    add_colour(nice_orange => [255,127,0]);
    add_colour(nice_green => [51,160,44]);
    add_colour(pale_blue => [153,206,227]);
    add_colour(pale_orange => [253,204,138]);
    add_colour(pale_green => [191,230,207]);

    $graph1->set(
		 x_label              => 'position (bp)',
		 y1_label              => '% methylation',
		 y2_label              => '# methylation calls',
		 title                => $graph_title,
		 line_width           => 2,
		 x_max_value          => $max_length_1,
		 x_min_value          => 0,
		 y_tick_number        => 10,
		 y_label_skip         => 2,
		 y1_max_value          => 100,
		 y1_min_value          => 0,
		 y_label_skip         => 2,
		 y2_min_value          => 0,
		 x_label_skip         => 5,
		 x_label_position     => 0.5,
		 x_tick_offset        => -1,
		 bgclr                => 'white',
		 transparent          => 0,
		 two_axes             => 1,
		 use_axis             => [1,1,1,2,2,2],
		 legend_placement     => 'RC',
		 legend_spacing       => 6,
		 legend_marker_width  => 24,
		 legend_marker_height => 18,
		 dclrs              => [ qw(nice_blue nice_orange nice_green pale_blue pale_orange pale_green)],
		) or die $graph1->error;

    $graph1->set_legend('CpG methylation','CHG methylation','CHH methylation','CpG total calls','CHG total calls','CHH total calls');

    ### Failure to plot the MBIAS graph will now generate a warning instead of dieing (previous version below. Suggested by Andrew DeiRossi, 05 June 2014)
    if (my $gd1 = $graph1->plot(\@mbias_read1)) {
      open (MBIAS_G1,'>',$mbias_graph_1) or die "Failed to write to file for M-bias plot 1: $!\n\n";
      binmode MBIAS_G1;
      print MBIAS_G1 $gd1->png;
    }
    else {
      warn "WARNING: Cannot generate read 1 M-bias plot: " , $graph1->error  , "\n\n";
    }

    #     my $gd1 = $graph1->plot(\@mbias_read1) or die $graph1->error;
    #     open (MBIAS_G1,'>',$mbias_graph_1) or die "Failed to write to file for M-bias plot 1: $!\n\n";
    #     binmode MBIAS_G1;
    #     print MBIAS_G1 $gd1->png;
  }

  if ($paired){

    foreach my $context (qw(CpG CHG CHH)){
      @{$mbias_read2[0]} = ();

      print MBIAS "$context context (R2)\n================\n";
      print MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";
      foreach my $pos (1..$max_length_2){
	
	unless (defined $mbias_2{$context}->{$pos}->{meth}){
	  $mbias_2{$context}->{$pos}->{meth} = 0;
	}
	unless (defined $mbias_2{$context}->{$pos}->{un}){
	  $mbias_2{$context}->{$pos}->{un} = 0;
	}

	my $percent = '';
	if (($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) > 0){
	  $percent = sprintf("%.2f",$mbias_2{$context}->{$pos}->{meth} * 100/ ($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) );
	}
	my $coverage = $mbias_2{$context}->{$pos}->{un} + $mbias_2{$context}->{$pos}->{meth};

	print MBIAS "$pos\t$mbias_2{$context}->{$pos}->{meth}\t$mbias_2{$context}->{$pos}->{un}\t$percent\t$coverage\n";
	
	push @{$mbias_read2[0]},$pos;
	
	if ($context eq 'CpG'){
	  push @{$mbias_read2[1]},$percent;
	  push @{$mbias_read2[4]},$coverage;
	}
	elsif ($context eq 'CHG'){
	  push @{$mbias_read2[2]},$percent;
	  push @{$mbias_read2[5]},$coverage;
	}
	elsif ($context eq 'CHH'){
	  push @{$mbias_read2[3]},$percent;
	  push @{$mbias_read2[6]},$coverage;
	}
      }	
      print MBIAS "\n";
    }

    if ( $gd_graph_installed){

      add_colour(nice_blue => [31,120,180]);
      add_colour(nice_orange => [255,127,0]);
      add_colour(nice_green => [51,160,44]);
      add_colour(pale_blue => [153,206,227]);
      add_colour(pale_orange => [253,204,138]);
      add_colour(pale_green => [191,230,207]);

      $graph2->set(
		   x_label              => 'position (bp)',
		   line_width           => 2,
		   x_max_value          => $max_length_1,
		   x_min_value          => 0,
		   y_tick_number        => 10,
		   y_label_skip         => 2,
		   y1_max_value          => 100,
		   y1_min_value          => 0,
		   y_label_skip         => 2,
		   y2_min_value          => 0,
		   x_label_skip         => 5,
		   x_label_position     => 0.5,
		   x_tick_offset        => -1,
		   bgclr                => 'white',
		   transparent          => 0,
		   two_axes             => 1,
		   use_axis             => [1,1,1,2,2,2],
		   legend_placement     => 'RC',
		   legend_spacing       => 6,
		   legend_marker_width  => 24,
		   legend_marker_height => 18,
		   dclrs                => [ qw(nice_blue nice_orange nice_green pale_blue pale_orange pale_green)],
		   x_label              => 'position (bp)',
		   y1_label             => '% methylation',
		   y2_label             => '# calls',
		   title                => 'M-bias (Read 2)',
		  ) or die $graph2->error;

      $graph2->set_legend('CpG methylation','CHG methylation','CHH methylation','CpG total calls','CHG total calls','CHH total calls');

      ### Failure to plot the MBIAS graph will now generate a warning instead of dieing (previous version below. Suggested by Andrew DeiRossi, 05 June 2014)
      if (my $gd2 = $graph2->plot(\@mbias_read2)) {
        open (MBIAS_G2,'>',$mbias_graph_2) or die "Failed to write to file for M-bias plot 2: $!\n\n";
        binmode MBIAS_G2;
        print MBIAS_G2 $gd2->png;
      }
      else {
        warn "WARNING: Cannot generate Read 2 M-bias plot: " , $graph2->error , "\n\n";
      }

      # my $gd2 = $graph2->plot(\@mbias_read2) or die $graph2->error;
      # open (MBIAS_G2,'>',$mbias_graph_2) or die "Failed to write to file for M-bias plot 2: $!\n\n";
      # binmode MBIAS_G2;
      # print MBIAS_G2 $gd2->png;

    }
  }
}

sub process_commandline{
	my $help;
	my $single_end;
	my $paired_end;
	my $ignore;
	my $ignore_r2;
	my $genomic_fasta;
	my $full;
	my $report;
	my $extractor_version;
	my $no_overlap;
	my $merge_non_CpG;
	my $output_dir;
	my $no_header;
	my $bedGraph;
	my $coverage_threshold = 1; # Minimum number of reads covering before calling methylation status
	my $remove;
	my $counts;
	my $cytosine_report;
	my $genome_folder;
	my $zero;
	my $CpG_only;
	my $CX_context;
	my $split_by_chromosome;
	my $sort_size;
	my $samtools_path;
	my $gzip;
	my $mbias_only;
	my $mbias_off;
	my $gazillion;
	my $ample_mem;
	my $include_overlap;
	my $ignore_3prime;
	my $ignore_3prime_r2;
	my $multicore;
	my $yacht;
	my $ucsc;
  
	my $command_line = GetOptions ('help|man'             => \$help,
				 'p|paired-end'         => \$paired_end,
				 's|single-end'         => \$single_end,
				 'fasta'                => \$genomic_fasta,
				 'ignore=i'             => \$ignore,
				 'ignore_r2=i'          => \$ignore_r2,
				 'comprehensive'        => \$full,
				 'report'               => \$report,
				 'version'              => \$extractor_version,
				 'no_overlap'           => \$no_overlap,
				 'merge_non_CpG'        => \$merge_non_CpG,
				 'o|output=s'           => \$output_dir,
				 'no_header'            => \$no_header,
				 'bedGraph'             => \$bedGraph,
				 "cutoff=i"             => \$coverage_threshold,
				 "remove_spaces"        => \$remove,
				 "counts"               => \$counts,
				 "cytosine_report"      => \$cytosine_report,
				 'g|genome_folder=s'    => \$genome_folder,
				 "zero_based"           => \$zero,	
				 "CX|CX_context"        => \$CX_context,
				 "split_by_chromosome"  => \$split_by_chromosome,
				 "buffer_size=s"        => \$sort_size,
				 'samtools_path=s'      => \$samtools_path,
				 'gzip'                 => \$gzip,
				 'mbias_only'           => \$mbias_only,			
				 'mbias_off'            => \$mbias_off,			
				 'gazillion|scaffolds'  => \$gazillion,
				 'ample_memory'         => \$ample_mem,
				 'include_overlap'      => \$include_overlap,
				 'ignore_3prime=i'      => \$ignore_3prime,
				 'ignore_3prime_r2=i'   => \$ignore_3prime_r2,
				 'parallel|multicore=i' => \$multicore,
				 'yacht'                => \$yacht,
				 'ucsc'                 => \$ucsc,
	);

	### EXIT ON ERROR if there were errors with any of the supplied options
	unless ($command_line){
		die "Please respecify command line options\n";
	}

	### HELPFILE
	if ($help){
		print_helpfile();
		exit;
	}

	if ($extractor_version){
		print << "VERSION";


                           Bismark Methylation Extractor

                         Bismark Extractor Version: $version
              Copyright 2010-20 Felix Krueger, Babraham Bioinformatics
                www.bioinformatics.babraham.ac.uk/projects/bismark/
                     https://github.com/FelixKrueger/Bismark


VERSION
		exit;
	}


	### no files provided
	unless (@ARGV){
		die "You need to provide one or more Bismark files to create an individual C methylation output. Please respecify!\n";
	}
	@filenames = @ARGV;

	warn "\n *** Bismark methylation extractor version $version ***\n\n";

	### M-BIAS ONLY
	if ($mbias_only){

		if ($mbias_off){
			die "Options '--mbias_only' and '--mbias_off' are not compatible. Just pick one, mkay?\n";
		}
		if ($bedGraph){
		  die "Option '--mbias_only' skips all sorts of methylation extraction, including the bedGraph generation. Please respecify!\n";
		}
		if ($cytosine_report){
		  die "Option '--mbias_only' skips all sorts of methylation extraction, including the genome-wide cytosine methylation report generation. Please respecify!\n";
		}
		if ($merge_non_CpG){
		  warn "Option '--mbias_only' skips all sorts of methylation extraction, thus '--merge' won't have any effect\n";
		}
		if ($full){
		  warn "Option '--mbias_only' skips all sorts of methylation extraction, thus '--comprehensive' won't have any effect\n";
		}
		sleep(3);
	}

	### PRINT A REPORT
	unless ($report){
		$report = 1; # making this the default
	}

	## OUTPUT DIR PATH
	if (defined $output_dir){
		unless ($output_dir =~ /\/$/){
			$output_dir =~ s/$/\//;
		}	
      
		# CRATING THE OUTPUT DIRECTORY IF NECESSARY
		if (chdir $output_dir){
			$output_dir = getcwd(); #  making the path absolute
			unless ($output_dir =~ /\/$/){
				$output_dir =~ s/$/\//;
			}
		}
		else{
			mkdir $output_dir or die "Unable to create directory $output_dir $!\n";
			warn "Output directory $output_dir doesn't exist, creating it for you...\n\n"; sleep(1);
			chdir $output_dir or die "Failed to move to $output_dir\n";
			$output_dir = getcwd(); #  making the path absolute
			unless ($output_dir =~ /\/$/){
				$output_dir =~ s/$/\//;
			}
		}
		warn "Output will be written into the directory: $output_dir\n";
	}
	else{
		$output_dir = '';
	}
	chdir $parent_dir or die "Failed to change back to the parent directory: $!";


	### NO HEADER
	unless ($no_header){
		$no_header = 0;
	}
	
	### PATH TO SAMTOOLS
	if (defined $samtools_path){
		# if Samtools was specified as full command
		if ($samtools_path =~ /samtools$/){
			if (-e $samtools_path){
				# Samtools executable found
			}
			else{
				die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
			}
		}
		else{
			unless ($samtools_path =~ /\/$/){
				$samtools_path =~ s/$/\//;
			}
			$samtools_path .= 'samtools';
			if (-e $samtools_path){
				# Samtools executable found
			}
			else{
				die "Could not find an installation of Samtools at the location $samtools_path. Please respecify\n";
			}
		}
	}
	# Check whether Samtools is in the PATH if no path was supplied by the user
	else{
		if (!system "which samtools >/dev/null 2>&1"){ # STDOUT is binned, STDERR is redirected to STDOUT. Returns 0 if Samtools is in the PATH
			$samtools_path = `which samtools`;
			chomp $samtools_path;
		}
	}

	unless (defined $samtools_path){
		die "No SAMtools installation found. Please add SAMtools to the PATH or specify its location with --samtools_path [path]\n";
	}
	

	if ($single_end){
		$paired_end = 0;   ### SINGLE END ALIGNMENTS
	}
	elsif ($paired_end){
		$single_end = 0;   ### PAIRED-END ALIGNMENTS
	}
	else{
		### we will try to determine whether the input file was a single-end or paired-end sequencing run from the SAM header
		# SAM/BAM format
		my $file = $filenames[0];

		warn "Trying to determine the type of mapping from the SAM header line of file $file\n"; sleep(1);
	
		unless (-e $file){
			die "\n[FATAL ERROR]:\tFile >>$file<< did not exist. Please re-specify file names and try again...\n\n";
		}

		### if the user did not specify whether the alignment file was single-end or paired-end we are trying to get this information from the @PG header line in the SAM/BAM file
		if ($file =~ /\.gz$/){
			open (DETERMINE,"gunzip -c $file |") or die "Unable to read from gzipped file $file: $!\n";
		}
		elsif ($file =~ /\.cram$/ || $file =~ /\.bam$/ || isBam($file) ){ ### this would allow to read BAM files that do not end in *.bam
			open (DETERMINE,"$samtools_path view -h $file |") or die "Unable to read from BAM/CRAM file $file: $!\n";
		}
		else{
			open (DETERMINE,$file) or die "Unable to read from $file: $!\n";
		}
	
		while (<DETERMINE>){
			last unless (/^\@/);
			if ($_ =~ /^\@PG/){
				# warn "found the \@PG line:\n";
				# warn "$_";

				if ($_ =~ /ID:Bismark/){
					# warn "This is the Bismark command we are after. Extracting the library type...\n"; sleep(1);
				}
				else{
					# warn "This isn't the Bismark \@PG line. Moving on...\n"; sleep(1);
					next;
				}

				if ($_ =~ /\s+--?1\s+/ and $_ =~ /\s+--?2\s+/){ # allowing -1 and -2 or --1  and --2 (or a combination thereof). 15 09 2016
					warn "Treating file(s) as paired-end data (as extracted from \@PG line)\n\n"; sleep(1);
					$paired_end = 1;
					$single_end = 0;
				}
				else{
					warn "Treating file(s) as single-end data (as extracted from \@PG line)\n\n"; sleep(1);
					$paired_end = 0;
					$single_end = 1;
				}
			}
		}		
		# close DETERMINE or warn $!; # this always throws an error anyway...
    }
  

	### IGNORING <INT> 5' END 
	# bases at the start of the read when processing the methylation call string
	unless ($ignore){
		$ignore = 0;
	}

	if (defined $ignore_r2){
		die "You can only specify --ignore_r2 for paired-end result files\n" unless ($paired_end);
	}
	else{
		$ignore_r2 = 0;
	}

	### IGNORING <INT> 3' END
	# bases at the end of the read when processing the methylation call string
	unless ($ignore_3prime){
		$ignore_3prime = 0;
	}

	if (defined $ignore_3prime_r2){
		die "You can only specify --ignore_3prime_r2 for paired-end result files\n" unless ($paired_end);
	}
	else{
		$ignore_3prime_r2 = 0;
	}


	### NO OVERLAP
	### --no_overlap is the default (as of version 0.12.6), unless someone explicitly asks to include overlaps
	if ($include_overlap){
		die "The option '--include_overlap' can only be specified for paired-end input!\n" unless ($paired_end);
		warn "Setting option '--inlcude_overlap' for paired-end data (user-defined)\n\n";
		$no_overlap = 0;
	}
	else{ # default
		if ($paired_end){
		warn "Setting option '--no_overlap' since this is (normally) the right thing to do for paired-end data\n\n";
		$no_overlap = 1;
		}
	}

	### COMPREHENSIVE OUTPUT
	unless ($full){
		$full = 0;
	}	

	### MERGE NON-CpG context
	unless ($merge_non_CpG){
		$merge_non_CpG = 0;
	}

	### remove white spaces in read ID (needed for sorting using the sort command
	unless ($remove){
		$remove = 0;
	}

	### COVERAGE THRESHOLD FOR bedGraph OUTPUT
	if (defined $coverage_threshold){
		unless ($coverage_threshold > 0){
			die "Please select a coverage greater than 0 (positive integers only)\n";
		}	
	}
	else{
		$coverage_threshold = 1;
	}

 
	if ($zero){
		die "Option '--zero' is only available if  '--bedGraph' or '--cytosine_report' are specified as well. Please respecify\n" unless ($cytosine_report or $bedGraph);
	}

	if ($CX_context){
		die "Option '--CX_context' is only available if  '--bedGraph' or '--cytosine_report' are specified as well. Please respecify\n" unless ($cytosine_report or $bedGraph);
	}
	else{
		$CX_context = 0;
	}	

	unless ($counts){
		$counts = 1; # counts will always be set
	}

	if ($cytosine_report){
	
		### GENOME folder
		if ($genome_folder){
			unless ($genome_folder =~/\/$/){
				$genome_folder =~ s/$/\//;
			}
		}
		else{
			die "Please specify a genome folder to proceed (full path only)\n";
		}

		unless ($bedGraph){
			warn "Setting the option '--bedGraph' since this is required for the genome-wide cytosine report\n";
			$bedGraph = 1;
		}
		unless ($counts){
			# warn "Setting the option '--counts' since this is required for the genome-wide cytosine report\n";
			$counts = 1;
		}
		warn "\n";
	}

 
	if ($ample_mem){
		if (defined $sort_size){
			die "The options '--ample_mem' and '--buffer_size' are mutually exclusive. Please pick just one and try again.\n\n";
		}
	}
 
	### SORT buffer size
	if (defined $sort_size){
		unless ($sort_size =~ /^\d+\%$/ or $sort_size =~ /^\d+(K|M|G|T)$/){
			die "Please select a buffer size as percentage (e.g. --buffer_size 20%) or a number to be multiplied with K, M, G, T etc. (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line\n";
		}
	}
	else{
		$sort_size = '2G';
	}
 
	if ($gazillion){
		if ($ample_mem){
			die "You can't currently select '--ample_mem' together with '--gazillion'. Make your pick!\n\n";
		}
	}

	if (defined $multicore){
		unless ($multicore > 0){
			die "Core usage needs to be set to 1 or more (currently selected $multicore). Please respecify!\n";
		}
		if ($multicore > 20){
			warn "Core usage currently set to more than 20 threads. Let's see how this goes... (set value: $multicore)\n\n";
		}
	}
	else{
		$multicore = 1; # default. Single-thread mode
		warn "Setting core usage to single-threaded (default). Consider using --multicore <int> to speed up the extraction process.\n\n";
	}

	# --yacht will produce a single output file (any_C_context_...) that will merge information from both CpG and non-CG contexts.
	if ($yacht){
		# setting --comprehensive option
		$full = 1;
		# setting --merge_non_CpG option
		$merge_non_CpG = 1;
		# single-end only working with single-cell, single-end non-directional data at the moment only 06 04 2017
		die "The option '--yacht' requires single end files! Please respecify!\n\n" unless ($single_end);
		die "The option '--yacht' does not work together with '--mbias_only'! Please respecify!\n\n" if ($mbias_only);
	}
  
	if ($ucsc){
    	warn "Creating an additional bedGraph file that has known Ensembl chromosome names replaced to work with the UCSC genome browser\n";
    	warn "This option:\n- prefixes chromosome names with 'chr'\n";
    	warn "- changes 'MT' to 'chrM'\n\n";
	}

	return ($ignore,$genomic_fasta,$single_end,$paired_end,$full,$report,$no_overlap,$merge_non_CpG,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_off,$mbias_only,$gazillion,$ample_mem,$ignore_3prime,$ignore_3prime_r2,$multicore,$yacht,$ucsc);
}

sub bam_isTruncated{
    
    my ($file) = @_;
    warn "Checking file >>$file<< for signs of file truncation...\n";
    
    open (TRUNC,"$samtools_path view 2>&1 $file |") or die "Unable to read from BAM file $file: $!\n"; # 2>&1 redirects stderr to stdout, so we should be able to capture problems
    
    my $count = 0;
    while (<TRUNC>){
		chomp;
		++$count;
		# errors tend to start with a [], I have seen e.g.: 
		# [W::bam_hdr_read] EOF marker is absent. The input is probably truncated.
		if ($_ =~ /^\[/){ 
			if ($_ =~ /[EOF|truncated]/){ 
			die "Captured error message: '$_'\n\n[ERROR] The file appears to be truncated, please ensure that there were no errors while copying the file!!! Exiting...\n\n";
			}
		}
		last if ($count == 10); 	# this should be enough
    }
    close TRUNC or warn "$!\n";
}

sub test_positional_sorting{

	my $filename = shift;

	print "\nNow testing Bismark result file >$filename< for positional sorting (which would be bad...)\t";
	sleep(1);
  
	if ($filename =~ /\.gz$/) {
		open (TEST,"gunzip -c $filename |") or die "Can't open gzipped file $filename: $!\n";
	}
	elsif ($filename =~ /bam$/ || $filename =~ /cram$/ || isBam($filename) ){ ### this would allow to read BAM files that do not end in *.bam
		if ($samtools_path){
			open (TEST,"$samtools_path view -h $filename |") or die "Can't open BAM/CRAM file $filename: $!\n";
		}
		else{
			die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
		}
	}
	else {
		open (TEST,$filename) or die "Can't open file $filename: $!\n";
	}

	my $count = 0;

	while (<TEST>) {
		if (/^\@/) {	     # testing header lines if they contain the @SO flag (for being sorted)
			if (/^\@SO/) {
				die "SAM/BAM header line '$_' indicates that the Bismark aligment file has been sorted by chromosomal positions which is is incompatible with correct methylation extraction. Please use an unsorted file instead\n\n";
			}
			next;
		}
		$count++;

		last if ($count > 100000); # else we test the first 100000 sequences if they start with the same read ID

		my ($id_1) = (split (/\t/));

		### reading the next line which should be read 2
		$_ = <TEST>;
		my ($id_2) = (split (/\t/));
		last unless ($id_2);
		++$count;

		if ($id_1 eq $id_2){
			### ids are the same
			next;
		}
		else{ ### in previous versions of Bismark we appended /1 and /2 to the read IDs for easier eyeballing which read is which. These tags need to be removed first
			my $id_1_trunc = $id_1;
			$id_1_trunc =~ s/\/1$//;
			my $id_2_trunc = $id_2;
			$id_2_trunc =~ s/\/2$//;
	
			unless ($id_1_trunc eq $id_2_trunc){
				die "\nThe IDs of Read 1 ($id_1) and Read 2 ($id_2) are not the same. This might be the result of sorting the paired-end SAM/BAM files by chromosomal position which is not compatible with correct methylation extraction. Please use an unsorted file instead or sort the file using 'samtools sort -n' (by read name). This may also occur using samtools merge as it does not guarantee the read order. To properly merge files please use 'samtools merge -n' or 'samtools cat'.\n\n";
			}
		}
	}
	#  close TEST or die $!; somehow fails on our cluster...
	### If it hasen't died so far then it seems the file is in the correct Bismark format (read 1 and read 2 of a pair directly following each other)
	warn "...passed!\n";
	sleep(1);

}

sub process_Bismark_results_file{

	my $filename = shift;
	my $report_filename = open_output_filehandles($filename);

	### disabling buffering so we don't run into problems with half written out lines...
	foreach my $fh (keys %fhs){
		if ($fh =~ /^[1230]$/) {
			foreach my $context (keys %{$fhs{$fh}}) {
				select($fhs{$fh}->{$context});
				$|++;
			}
		}
		else{
			select($fhs{$fh});
			$|++;
		}
	}
	select(STDOUT);

	################################################
	################################################
	### multi-process handling
	###

	my $offset = 1;
	my $process_id;
	my @pids;
	if ($multicore > 1){

		until ($offset == $multicore){
			# warn "multicore: $multicore\noffset: $offset\n";
			my $fork = fork;

			if (defined $fork){
				if ($fork != 0){
					$process_id = $fork;
					push @pids, $process_id;
					if ($offset < $multicore){
						++$offset;
						# warn "I am the parent process, child pid: $fork\nIncrementing offset counter to: $offset\n\n";
					}
					else{
						# warn "Reached the number of maximum multicores. Proceeeding to processing...\n";
					}
				}
				elsif ($fork == 0){
					# warn "I am a child process, pid: $fork\nOffset counter is: $offset\nProceeding to processing...\n";
					$process_id = $fork;
					last;
				}
			}
			else{
				die "Forking unsuccessful. Proceeding using a single thread only\n";
			}
		}

		# warn "\nThe thread identity\n===================\n";
		if ($process_id){
			# print "I am the parent process. My children are called:\n";
			# print join ("\t",@pids),"\n";
			# print "I am going to process the following line count: $offset\n\n";
		}
		elsif($process_id == 0){
			# warn "I am a child process: Process ID: $process_id\n";
			# warn "I am going to process the following line count: $offset\n\n";
		}
		else{
			die "Process ID was: $process_id\n";
		}
	}
	else{
		# warn "Single-core mode: setting pid to 1\n";
		$process_id = 1;
	}

	################################################
	################################################
	if ($process_id){
		warn "Now reading in Bismark result file $filename\n";
	}
	else{
		warn "\nNow reading in Bismark result file $filename\n\n";
	}

	if ($filename =~ /\.gz$/) {
		open (IN,"gunzip -c $filename |") or die "Can't open gzipped file $filename: $!\n";
	}
	elsif ($filename =~ /bam$/ || $filename =~ /cram$/ || isBam($filename) ){ ### this would allow to read BAM files that do not end in *.bam
		if ($samtools_path){
			open (IN,"$samtools_path view -h $filename |") or die "Can't open BAM/CRAM file $filename: $!\n";
		}
		else{
			die "Sorry couldn't find an installation of Samtools. Either specifiy an alternative path using the option '--samtools_path /your/path/', or use a SAM file instead\n\n";
		}
	}
	else {
		open (IN,$filename) or die "Can't open file $filename: $!\n";
	}
	
	my $methylation_call_strings_processed = 0;
	my $line_count = 0;

	### proceeding differently now for single-end or paired-end Bismark files

	### PROCESSING SINGLE-END RESULT FILES
	if ($single) {
		# processing single-end SAM format (default)
		while (<IN>) {
			chomp;
			### SAM format can either start with header lines (starting with @) or start with alignments directly
			if (/^\@/) {	     # skipping header lines (starting with @)
				warn "skipping SAM header line:\t$_\n" unless ($process_id == 0);
				next;
			}
			++$line_count;
			#	warn "$line_count\n";
			warn "Processed lines: $line_count\n" if ($line_count%500000 == 0);

			if ( ($line_count - $offset)%$multicore == 0){
				# warn "line count: $line_count\noffset: $offset\n";
				# warn "Modulus: ",($line_count - $offset)%$multicore,"\n";
				# warn "processing this line $line_count (processID: $process_id with \$offset $offset)\n";
			}
			else{
				# warn "skipping line $line_count for processID: $process_id with \$offset $offset)\n";
				next;
			}
	
			# example read in SAM format
			# 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
			###

			# < 0.7.6 my ($id,$chrom,$start,$meth_call,$read_conversion,$genome_conversion) = (split("\t"))[0,2,3,13,14,15];
			# < 0.7.6 $meth_call =~ s/^XM:Z://;
			# < 0.7.6 $read_conversion =~ s/^XR:Z://;
			# < 0.7.6 $genome_conversion =~ s/^XG:Z://;	

			my ($id,$chrom,$start,$cigar) = (split("\t"))[0,2,3,5];

			### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
			my $meth_call;	  ### Thanks to Zachary Zeno for this solution
			my $read_conversion;
			my $genome_conversion;
			
			while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
				my $tag = $1;
				my $value = $2;

				if ($tag eq "XM") {
					$meth_call = $value;
					$meth_call =~ s/\r//;
				} elsif ($tag eq "XR") {
					$read_conversion = $value;
					$read_conversion =~ s/\r//;
				} elsif ($tag eq "XG") {
					$genome_conversion = $value;
					$genome_conversion =~ s/\r//;
				}
			}		

			my $strand;
			# warn "$meth_call\n$read_conversion\n$genome_conversion\n";
	
			my $index;
			if ($meth_call) {
				if ($read_conversion eq 'CT' and $genome_conversion eq 'CT') { ## original top strand
					$index = 0;
					$strand = '+';
				} elsif ($read_conversion eq 'GA' and $genome_conversion eq 'CT') { ## complementary to original top strand
					$index = 1;
					$strand = '-';
				} elsif ($read_conversion eq 'GA' and $genome_conversion eq 'GA') { ## complementary to original bottom strand
					$index = 2;
					$strand = '+';
				} elsif ($read_conversion eq 'CT' and $genome_conversion eq 'GA') { ## original bottom strand
					$index = 3;
					$strand = '-';
				} else {
					die "Unexpected combination of read and genome conversion: '$read_conversion' / '$genome_conversion'\n";
				}
	
				### If the read is in SAM format we need to reverse the methylation call if the read has been reverse-complemented for the output
				if ($strand eq '-') {
					$meth_call = reverse $meth_call;
				}
				# warn "\n$meth_call\n";
	
				### IGNORE 5 PRIME: Clipping off the first <int> number of bases from the methylation call string as specified with --ignore <int>
				if ($ignore) {
					# warn "\n\n$meth_call\n";
					$meth_call = substr($meth_call,$ignore,length($meth_call)-$ignore);	
					# warn "$meth_call\n";sleep(1);

					### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly

					my @len = split (/\D+/,$cigar); # storing the length per operation
					my @ops = split (/\d+/,$cigar); # storing the operation
					shift @ops;		# remove the empty first element
					die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
					
					my @comp_cigar; # building an array with all CIGAR operations
					foreach my $index (0..$#len) {
						foreach (1..$len[$index]) {
							# print  "$ops[$index]";
							push @comp_cigar, $ops[$index];
						}
					}
					#print "original CIGAR: $cigar\n";
					#print "original CIGAR: @comp_cigar\n"; sleep(1);

					### If we are clipping off some bases at the start we need to adjust the start position of the alignments accordingly!
					if ($strand eq '+') {
						 
						my $D_count = 0; # counting all deletions that affect the ignored genomic position, i.e. Deletions, insertions and skipped regions
						my $N_count = 0; # matches and soft-clips do not count
						my $I_count = 0;

						for (1..$ignore) {
							my $op = shift @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations from the start
							#print "$_ deleted $op\n";

							while ($op eq 'D') { # repeating this for deletions (D)
								$D_count++;
								# print "$_ deleted $op (deletion)\n";
								$op = shift @comp_cigar;
							}
							while ($op eq 'N') { # repeating this for skipped regions (N)
								$N_count++;
								# print "$_ deleted $op (skipped region)\n";
								$op = shift @comp_cigar;
							}
							if ($op eq 'I') { # adjusting the genomic position for insertions (I)
								$I_count++;
							}
						}
						$start += $ignore + $D_count + $N_count - $I_count;
						# print "start $start\t ignore: $ignore\t D count: $D_count N count: $N_count I_count: $I_count\n";
					}
					elsif ($strand eq '-') {

						for (1..$ignore) {
							my $op = pop @comp_cigar; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
							while ($op eq 'D') { # repeating this for deletions (D)
								$op = pop @comp_cigar;
							}
							while ($op eq 'N') { # repeating this for skipped regions (spliced reads) (N)
								$op = pop @comp_cigar;
							}
						}

						### For reverse strand alignments we need to determine the number of matching bases (M),
						### deletions (D) or skipped regions (N) in the read from the CIGAR
						### string to be able to work out the starting position of the read which is on the 3' end of the sequence
						my $MDN_count = 0;	# counting all operations that affect the genomic position, i.e. M, N and D. Insertions or soft-clips do not affect the start position
						foreach (@comp_cigar) {
							++$MDN_count if ($_ eq 'M' or $_ eq 'D' or $_ eq 'N');
						}
						$start += $MDN_count - 1;
					}
				
					### reconstituting shortened CIGAR string
					my $new_cigar;
					my $count = 0;
					my $last_op;
					# print "ignore adjusted: @comp_cigar\n";
					foreach my $op (@comp_cigar) {
						unless (defined $last_op){
							$last_op = $op;
							++$count;
							next;
						}
						if ($last_op eq $op) {
							++$count;
						}
						else {
							$new_cigar .= "$count$last_op";
							$last_op = $op;
							$count = 1;
						}
					}
					$new_cigar .= "$count$last_op"; # appending the last operation and count
					$cigar = $new_cigar;
					# print "ignore adjusted scalar: $cigar\n"; sleep(1);
				}
	
				#######################
				###  INGORE 3' END  ###
				#######################
	
				# Clipping off the last <int> number of bases from the methylation call string as specified with --ignore_3prime <int>
				if ($ignore_3prime) {
					# warn "$meth_call\n";
					$meth_call = substr($meth_call,0, (length($meth_call)) - $ignore_3prime);	
					# warn "$meth_call\n";sleep(1);
		
					### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly

					my @len = split (/\D+/,$cigar); # storing the length per operation
					my @ops = split (/\d+/,$cigar); # storing the operation
					shift @ops;		# remove the empty first element
					die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
		
					my @comp_cigar; # building an array with all CIGAR operations
					foreach my $index (0..$#len) {
						foreach (1..$len[$index]) {
							# print  "$ops[$index]";
							push @comp_cigar, $ops[$index];
						}
					}

					# print "original CIGAR: $cigar\n";
					# print join ("",@comp_cigar),"\n";

					### If we are clipping off some bases at the end we might have to adjust the start position of the alignments accordingly
					if ($strand eq '+') {

						### clipping the 3' end does not affect the starting position of forward strand alignments
						# ignore 5' has already been taken care of at this stage, if relevant at all
				
						for (1..$ignore_3prime) {
							my $op = pop @comp_cigar; # adjusting composite CIGAR string by removing $ignore_3prime operations from the end
							# print join ("",@comp_cigar),"\n";
							# print "$_ deleted $op from 3' end\n";
							while ($op eq 'D') { # repeating this for deletions (D)
								# print join ("",@comp_cigar),"\n";
								# print "$_ deleted $op from 3' end (deletion)\n";
								$op = pop @comp_cigar;
							}
							while ($op eq 'N') { # repeating this for skipped regions (N)
								# print join ("",@comp_cigar),"\n";
								# print "$_ deleted $op from 3' end (skipped region)\n";
								$op = pop @comp_cigar;
							}
						}
						# print "Final truncated CIGAR string:\n";
						# print join ("",@comp_cigar),"\n";
						# print "start $start\t ignore_3prime: $ignore_3prime\t D count: $D_count I_count: $I_count\n";
					}
					elsif ($strand eq '-') {

						my $D_count = 0; # counting all deletions that affect the ignored genomic position, i.e. Deletions, insertions and skipped regions
						my $N_count = 0;
						my $I_count = 0;
	
						for (1..$ignore_3prime) {
							my $op = shift @comp_cigar; # adjusting composite CIGAR string by removing $ignore_3prime operations, here the first value of the array
							while ($op eq 'D') { # repeating this for deletions (D)
								$D_count++;
								$op = shift @comp_cigar;
							}
							while ($op eq 'N') { # repeating this for skipped regions (N)
								$N_count++;
								$op = shift @comp_cigar;
							}
							if ($op eq 'I') { # adjusting the genomic position for insertions (I)
								$I_count++;
							}
						}
	
						# here we need to discriminate if the start has been adjusted because of --ignore or not
						if ($ignore){
							# the start position has already been modified for --ignore already, so we don't have to adjust the position again now
						}
						else{
							# Here we need to add the length ignore_3prime to the read starting position
							#  adjustment of the true starting position of this reverse read will take place later in the methylation extraction step
							$start += $ignore_3prime + $D_count + $N_count - $I_count;
						}
					}
	
					### reconstituting shortened CIGAR string
					my $new_cigar;
					my $count = 0;
					my $last_op;
					# print "ignore_3prime adjusted:\n"; print join ("",@comp_cigar),"\n";
					foreach my $op (@comp_cigar) {
						unless (defined $last_op){
							$last_op = $op;
							++$count;
							next;
						}
						if ($last_op eq $op) {
							++$count;
						} 
						else {
							$new_cigar .= "$count$last_op";
							$last_op = $op;
							$count = 1;
						}
					}
					$new_cigar .= "$count$last_op"; # appending the last operation and count
					$cigar = $new_cigar;
					# print "ignore_3prime adjusted scalar: $cigar\n";
				}
			}
			### printing out the methylation state of every C in the read
			print_individual_C_methylation_states_single_end($meth_call,$chrom,$start,$id,$strand,$index,$cigar);
	
			++$methylation_call_strings_processed; # 1 per single-end result
		}
	}
	### PROCESSING PAIRED-END RESULT FILES
	elsif ($paired) {
	    # Bismark paired-end BAM/SAM output format (default)
		while (<IN>) {
			chomp;
			### SAM format can either start with header lines (starting with @) or start with alignments directly
			if (/^\@/) {	     # skipping header lines (starting with @)
				warn "skipping SAM header line:\t$_\n" unless ($process_id == 0); # no additional warnings for child procesess
				next;
			}
	
			++$line_count;
			warn "Processed lines: $line_count\n" if ($line_count%500000==0);
		
			if ( ($line_count - $offset)%$multicore == 0){
				# warn "line count: $line_count\noffset: $offset\n";	
				# warn "Modulus: ",($line_count - $offset)%$multicore,"\n";
				# warn "processing this line $line_count (processID: $process_id with \$offset $offset)\n";
			}
			else{
				# warn "skipping line $line_count for processID: $process_id with \$offset $offset)\n";
				$_ = <IN>; # reading and skipping another line since this is the paired-end read
				next;
			}

			# example paired-end reads in SAM format (2 consecutive lines)
			# 1_R1/1	67	5	103172224	255	40M	=	103172417	233	AATATTTTTTTTATTTTAAAATGTGTATTGATTTAAATTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:4	XX:Z:4T1T24TT7	XM:Z:....h.h........................hh.......	XR:Z:CT	XG:Z:CT
			# 1_R1/2	131	5	103172417	255	40M	=	103172224	-233	TATTTTTTTTTAGAGTATTTTTTAATGGTTATTAGATTTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:6	XX:Z:T5T1T9T9T7T3	XM:Z:h.....h.h.........h.........h.......h...	XR:Z:GA	XG:Z:CT
			
			my ($id_1,$chrom,$start_read_1,$cigar_1) = (split("\t"))[0,2,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression
			my $meth_call_1;
			my $first_read_conversion;
			my $genome_conversion;
		
			while ( /(XM|XR|XG):Z:([^\t]+)/g ) {
				my $tag = $1;
				my $value = $2;

				if ($tag eq "XM") {
					$meth_call_1 = $value;
					$meth_call_1 =~ s/\r//;
				} elsif ($tag eq "XR") {
					$first_read_conversion = $value;
					$first_read_conversion =~ s/\r//;
				} elsif ($tag eq "XG") {
					$genome_conversion = $value;
					$genome_conversion =~ s/\r//;
				}
			}

			$_ = <IN>;		# reading in the paired read
			chomp;
		
			my ($id_2,$start_read_2,$cigar_2) = (split("\t"))[0,3,5]; ### detecting the following SAM flags in case the SAM entry was shuffled by CRAM or Goby compression/decompression

			my $meth_call_2;
			my $second_read_conversion;
		
			while ( /(XM|XR):Z:([^\t]+)/g ) {
				my $tag = $1;
				my $value = $2;

				if ($tag eq "XM") {
					$meth_call_2 = $value;
					$meth_call_2 =~ s/\r//;
				} elsif ($tag eq "XR") {
					$second_read_conversion = $value;
					$second_read_conversion = s/\r//;
				}
			}
		
			# < version 0.7.6 $genome_conversion =~ s/^XG:Z://;	
			# chomp $genome_conversion; # in case it captured a new line character	# already removed

			# print join ("\t",$meth_call_1,$meth_call_2,$first_read_conversion,$second_read_conversion,$genome_conversion),"\n";

			my $index;
			my $strand;

			if ($first_read_conversion eq 'CT' and $genome_conversion eq 'CT') {
			  $index = 0;		## this is OT
			  $strand = '+';
			} elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'CT') {
			  $index = 1;		## this is CTOT
			  $strand = '-';
			} elsif ($first_read_conversion eq 'GA' and $genome_conversion eq 'GA') {
			  $index = 2;		## this is CTOB
			  $strand = '+';
			} elsif ($first_read_conversion eq 'CT' and $genome_conversion eq 'GA') {
			  $index = 3;		## this is OB
			  $strand = '-';
			} else {
			  die "Unexpected combination of read and genome conversion: $first_read_conversion / $genome_conversion\n";
			}

			### reversing the methylation call of the read that was reverse-complemented
			if ($strand eq '+') {
				$meth_call_2 = reverse $meth_call_2;
			} 
			else {
				$meth_call_1 = reverse $meth_call_1;
			}
			# warn "\n$meth_call_1\n$meth_call_2\n";

			if ($meth_call_1 and $meth_call_2) {

				my $end_read_1;
		
				### READ 1
				my @len_1 = split (/\D+/,$cigar_1); # storing the length per operation
				my @ops_1 = split (/\d+/,$cigar_1); # storing the operation
				shift @ops_1;		# remove the empty first element

				die "CIGAR string contained a non-matching number of lengths and operations: $cigar_1\n".join(" ",@len_1)."\n".join(" ",@ops_1)."\n" unless (scalar @len_1 == scalar @ops_1);
		
				my @comp_cigar_1; # building an array with all CIGAR operations
				foreach my $index (0..$#len_1) {
					foreach (1..$len_1[$index]) {
						# print  "$ops_1[$index]";
						push @comp_cigar_1, $ops_1[$index];
					}
				}
				# print "original CIGAR read 1: $cigar_1\n";
				# print "original CIGAR read 1: @comp_cigar_1\n"; 

				### READ 2
				my @len_2 = split (/\D+/,$cigar_2); # storing the length per operation
				my @ops_2 = split (/\d+/,$cigar_2); # storing the operation
				shift @ops_2;		# remove the empty first element
				die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len_2 == scalar @ops_2);
				my @comp_cigar_2; # building an array with all CIGAR operations for read 2
				foreach my $index (0..$#len_2) {
					foreach (1..$len_2[$index]) {
						# print  "$ops_2[$index]";
						push @comp_cigar_2, $ops_2[$index];
					}
				}
				# print "original CIGAR read 2: $cigar_2\n";
				# print "original CIGAR read 2: @comp_cigar_2\n";	sleep(3);
		
				##################################	
				###  IGNORE BASES FROM 5' END  ###
				##################################

				if ($ignore) {
					### Clipping off the first <int> number of bases from the methylation call string as specified with '--ignore <int>' for read 1	
					### the methylation calls have already been reversed where necessary

					if ( (length($meth_call_1) - $ignore) <= 0){
						# next; # skipping this read entirely if the read is shorter than the portion to be ignored
						$meth_call_1 = undef; # will skip this read entirely since the read is shorter than the portion to be ignored
					}
					else {
						$meth_call_1 = substr($meth_call_1,$ignore,length($meth_call_1)-$ignore);
			
						if ($strand eq '+') {
			
							### if the (read 1) strand information is '+', read 1 needs to be trimmed from the start
							my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1,
							my $N_count_1 = 0; # i.e.insertions, deletions and skipped regions
							my $I_count_1 = 0;
			
							for (1..$ignore) {
								my $op = shift @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore operations from the start
								# print "$_ deleted $op\n";
			
								while ($op eq 'D') { # repeating this for deletions (D)
									$D_count_1++;
									# print "$_ deleted $op (deletion\n";
									$op = shift @comp_cigar_1;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									$N_count_1++;
									# print "$_ deleted $op (skipped region)\n";
									$op = shift @comp_cigar_1;
								}
								if ($op eq 'I') { # adjusting the genomic position for insertions (I)
									$I_count_1++;
								}
							}
			
							$start_read_1 += $ignore + $D_count_1 + $N_count_1 - $I_count_1;
							# print "start read 1 $start_read_1\t ignore: $ignore\t D count 1: $D_count_1\tN count 1: $N_count_1\tI_count 1: $I_count_1\n";
							# the start position of reads mapping to the reverse strand is being adjusted further below
						}
						elsif ($strand eq '-') {
			
							### if the (read 1) strand information is '-', read 1 needs to be trimmed from the back
							for (1..$ignore) {
								my $op = pop @comp_cigar_1; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
								while ($op eq 'D') { # repeating this for deletions (D)
									$op = pop @comp_cigar_1;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									$op = pop @comp_cigar_1;
								}
							}
							# the start position of reads mapping to the reverse strand is being adjusted further below
						}
					}
				}
		
				if ($ignore_r2) {
					### Clipping off the first <int> number of bases from the methylation call string as specified with '--ignore_r2 <int>' for read 2	
					### the methylation calls have already been reversed where necessary

					if ( (length($meth_call_2) - $ignore_r2) <= 0){
						# next; # skipping this read entirely if the read is shorter than the portion to be ignored # this would skip the entire read pair!
						$meth_call_2 = undef; # will skip this read entirely since the read is shorter than the portion to be ignored
					}
					else {
						$meth_call_2 = substr($meth_call_2,$ignore_r2,length($meth_call_2)-$ignore_r2);
		
						### If we are ignoring a part of the sequence we also need to adjust the cigar string accordingly
						if ($strand eq '+') {
			
							### if the (read 1) strand information is '+', read 2 needs to be trimmed from the back

							for (1..$ignore_r2) {
								my $op = pop @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the last value of the array
								while ($op eq 'D') { # repeating this for deletions (D)
									$op = pop @comp_cigar_2;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									$op = pop @comp_cigar_2;
								}
							}					
							# the start position of reads mapping to the reverse strand is being adjusted further below
						}
						elsif ($strand eq '-') {
		
							### if the (read 1) strand information is '-', read 2 needs to be trimmed from the start
							my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2,
							my $N_count_2 = 0; # i.e. deletions, insertions and skipped regions
							my $I_count_2 = 0;

							for (1..$ignore_r2) {
								my $op = shift @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
								# print "$_ deleted $op\n";
			
								while ($op eq 'D') { # repeating this for deletions (D)
									$D_count_2++;
									# print "$_ deleted $op (deletion)\n";
									$op = shift @comp_cigar_2;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									$N_count_2++;
									# print "$_ deleted $op (skipped region)\n";
									$op = shift @comp_cigar_2;
								}
								if ($op eq 'I') { # adjusting the genomic position for insertions (I)
									$I_count_2++;
								}
							}
			
							$start_read_2 += $ignore_r2 + $D_count_2 + $N_count_2 - $I_count_2;
							# print "start read 2 $start_read_2\t ignore R2: $ignore_r2\t D count 2: $D_count_2\t N count 2: $N_count_2\tI_count 2: $I_count_2\n";
						}
					}
				}
		
				if ($ignore and $meth_call_1){ # if the methylation call string is undefined at this position we don't need any new CIGAR string
		
					### reconstituting shortened CIGAR string 1
					my $new_cigar_1;
					my $count_1 = 0;
					my $last_op_1;
					# print "ignore adjusted CIGAR 1: @comp_cigar_1\n";
					foreach my $op (@comp_cigar_1) {
						unless (defined $last_op_1){
							$last_op_1 = $op;
							++$count_1;
							next;
						}
						if ($last_op_1 eq $op) {
							++$count_1;
						}
						else {
							$new_cigar_1 .= "$count_1$last_op_1";
							$last_op_1 = $op;
							$count_1 = 1;
						}
					}
					$new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
					$cigar_1 = $new_cigar_1;
					# print "ignore adjusted CIGAR 1 scalar: $cigar_1\n";
				}

				if ($ignore_r2 and $meth_call_2){ # if the methylation call string is undefined at this point we don't need any new CIGAR string

					### reconstituting shortened CIGAR string 2
					my $new_cigar_2;
					my $count_2 = 0;
					my $last_op_2;
					# print "ignore adjusted CIGAR 2: @comp_cigar_2\n";
					foreach my $op (@comp_cigar_2) {
						unless (defined $last_op_2){
							$last_op_2 = $op;
							++$count_2;
							next;
						}
						if ($last_op_2 eq $op) {
							++$count_2;
						}
						else {
							$new_cigar_2 .= "$count_2$last_op_2";
							$last_op_2 = $op;
							$count_2 = 1;
						}
					}
					$new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
					$cigar_2 = $new_cigar_2;
					# print "ignore_r2 adjusted CIGAR 2 scalar: $cigar_2\n";
				}

				###########################
				###  END IGNORE 5' END  ###
				###########################

				##################################
				###  IGNORE BASES FROM 3' END  ###
				##################################

				# print "CIGAR string before truncating 3' end (Read 1)\n";
				# print join ("",@comp_cigar_1),"\n";
			
				if ($ignore_3prime and $meth_call_1) { # if the methylation call string is undefined at this point we don't need to process the read any further

					### Clipping off the last <int> number of bases from the methylation call string as specified with '--ignore_3prime <int>' for read 1	
					### the methylation calls have already been reversed where necessary

					if ( (length($meth_call_1) - $ignore_3prime) <= 0){
						$meth_call_1 = undef; # will skip this read entirely since the read is shorter than the portion to be ignored
					}
					else {
						$meth_call_1 = substr($meth_call_1,0,length($meth_call_1) - $ignore_3prime);
						# warn "truncated meth_call 1:\n$meth_call_1\n";
				
						if ($strand eq '+') {

							### if the (read 1) strand information is '+', clipping the 3' end does not affect the starting position of forward strand alignments
							# ignore 5' has already been taken care of at this stage, if relevant at all
			
							for (1..$ignore_3prime) {
								my $op = pop @comp_cigar_1; # adjusting composite CIGAR string of read 1 by removing $ignore_3prime operations from the end
								# print "$_ deleted $op from 3' end\n";
								# print join ("",@comp_cigar_1),"\n";
							
								while ($op eq 'D') { # repeating this for deletions (D)
									# print "$_ deleted $op from 3' end (deletion)\n";
									# print join ("",@comp_cigar_1),"\n";
									$op = pop @comp_cigar_1;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									# print "$_ deleted $op from 3' end (skipped region)\n";
									# print join ("",@comp_cigar_1),"\n";
									$op = pop @comp_cigar_1;
								}
							}
			
							# print "Final truncated CIGAR string (Read 1):\n";
							# print join ("",@comp_cigar_1),"\n";
						}
						elsif ($strand eq '-') {

							my $D_count_1 = 0; # counting all deletions that affect the ignored genomic position for read 1,
							my $N_count_1 = 0; # i.e. deletions, insertions and skipped regions
							my $I_count_1 = 0;
		
							### if the (read 1) strand information is '-', the read 1 CIGAR string needs to be trimmed from the start
							for (1..$ignore_3prime) {
								my $op = shift @comp_cigar_1; # adjusting composite CIGAR string by removing $ignore_3prime operations, here the first value of the array
								# print join ("",@comp_cigar_1),"\n";
				
								while ($op eq 'D') { # repeating this for deletions (D)
									$D_count_1++;
									$op = shift @comp_cigar_1;
									# print join ("",@comp_cigar_1),"\n";
								}
								while ($op eq 'N') { # repeating this for skipped region (N)
									$N_count_1++;
									$op = shift @comp_cigar_1;
									# print join ("",@comp_cigar_1),"\n";
								}
								if ($op eq 'I') { # adjusting the genomic position for insertions (I)
									$I_count_1++;
								}
							}

							# print "Final truncated CIGAR string reverse_read:\n";
							# print join ("",@comp_cigar_1),"\n";
			
							# Here we need to add the length ignore_3prime to the read starting position
							# adjustment of the true start position of this reverse read will take place later in the methylation extraction step
							$start_read_1 += $ignore_3prime + $D_count_1 + $N_count_1 - $I_count_1;
						}
					}
				}
		
				if ($ignore_3prime_r2 and $meth_call_2) { # if the methylation call string is undefined at this point we don't need to process the read any further
		
					### Clipping off the last <int> number of bases from the methylation call string as specified with '--ignore_3prime_r2 <int>' for read 2	
					### the methylation calls have already been reversed where necessary
		
					if ( (length($meth_call_2) - $ignore_3prime_r2) <= 0){
						$meth_call_2 = undef; # will skip this read entirely since the read is shorter than the portion to be ignored
					}
					else {
						$meth_call_2 = substr($meth_call_2,0,length($meth_call_2) - $ignore_3prime_r2);
						# warn "truncated meth_call 2:\n$meth_call_2\n";
		
						### If we are ignoring a part of the sequence we also need to adjust the cigar string and the positions accordingly
		
						if ($strand eq '+') {
			
							### if the (read 1) strand information is '+', clipping the 3' end of read 2 does potentially affect the starting position of read 2 (reverse strand alignment)
							### if the (read 1) strand information is '+', read 2 needs to be trimmed from the start
							
							my $D_count_2 = 0; # counting all deletions that affect the ignored genomic position for read 2, i.e. Deletions and insertions
							my $N_count_2 = 0;
							my $I_count_2 = 0;
							
							for (1..$ignore_3prime_r2) {
								my $op = shift @comp_cigar_2; # adjusting composite CIGAR string by removing $ignore operations, here the first value of the array
								# print "$_ deleted $op from 3' end\n";
								# print join ("",@comp_cigar_2),"\n";
							
								while ($op eq 'D') { # repeating this for deletions (D)
									$D_count_2++;
									# print "$_ deleted $op from 3' end\n";
									# print join ("",@comp_cigar_2),"\n";
									$op = shift @comp_cigar_2;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									$N_count_2++;
									# print "$_ deleted $op from 3' end\n";
									# print join ("",@comp_cigar_2),"\n";
									$op = shift @comp_cigar_2;
								}

								if ($op eq 'I') { # adjusting the genomic position for insertions (I)
									$I_count_2++;
								}
							}
							
							# print "Final truncated CIGAR string 2 (+ alignment):\n";
							# print join ("",@comp_cigar_2),"\n";
							
							# Here we need to add the length ignore_3prime_r2 to the read starting position
							# adjustment of the true start position of this reverse read will take place later in the methylation extraction step
							$start_read_2 += $ignore_3prime_r2 + $D_count_2 + $N_count_2 - $I_count_2;
							
							# print "start read 2 $start_read_2\t ignore R2: $ignore_r2\t D count 2: $D_count_2\tI_count 2: $I_count_2\n";
						}
						elsif ($strand eq '-') {
							### if the (read 1) strand information is '-', clipping the 3' end of read 2 does not affect its starting position (forward strand alignment)
							### ignore_r2 5' has already been taken care of at this stage, if relevant at all
			
							### if the (read 1) strand information is '-', read 2 needs to be trimmed from the end
			
							for (1..$ignore_3prime_r2) {
								my $op = pop @comp_cigar_2; # adjusting composite CIGAR string of read 2 by removing $ignore operations from the start
								# print "$_ deleted $op\n";
								# print join ("",@comp_cigar_2),"\n";
			
								while ($op eq 'D') { # repeating this for deletions (D)
									# print "$_ deleted $op (deletion)\n";
									# print join ("",@comp_cigar_2),"\n";
									$op = pop @comp_cigar_2;
								}
								while ($op eq 'N') { # repeating this for skipped regions (N)
									# print "$_ deleted $op (skipped region)\n";
									# print join ("",@comp_cigar_2),"\n";
									$op = pop @comp_cigar_2;
								}
							}
			
							# print "Final truncated CIGAR string 2 (- alignment):\n";
							# print join ("",@comp_cigar_2),"\n";
			
						}
					}
				}
		
				if ($ignore_3prime and $meth_call_1){ # if the methylation call string is undefined at this point we don't need any new CIGAR string
		
					### reconstituting shortened CIGAR string 1
					my $new_cigar_1;
					my $count_1 = 0;
					my $last_op_1;
					# print "ignore_3prime adjusted CIGAR 1: @comp_cigar_1\n";
					foreach my $op (@comp_cigar_1) {
						unless (defined $last_op_1){
							$last_op_1 = $op;
							++$count_1;
							next;
						}
						if ($last_op_1 eq $op) {
							++$count_1;
						}
						else {
							$new_cigar_1 .= "$count_1$last_op_1";
							$last_op_1 = $op;
							$count_1 = 1;
						}
					}
					$new_cigar_1 .= "$count_1$last_op_1"; # appending the last operation and count
					$cigar_1 = $new_cigar_1;
					# warn "ignore_3prime adjusted CIGAR 1 scalar: $cigar_1\n";
				}

				if ($ignore_3prime_r2 and $meth_call_2){ # if the methylation call string is undefined at this point we don't need any new CIGAR string
		
					### reconstituting shortened CIGAR string 2
					my $new_cigar_2;
					my $count_2 = 0;
					my $last_op_2;
					# print "ignore_3prime_r2 adjusted CIGAR 2: @comp_cigar_2\n";
					foreach my $op (@comp_cigar_2) {
						unless (defined $last_op_2){
							$last_op_2 = $op;
							++$count_2;
							next;
						}
						if ($last_op_2 eq $op) {
							++$count_2;
						}
						else {
							$new_cigar_2 .= "$count_2$last_op_2";
							$last_op_2 = $op;
							$count_2 = 1;
						}
					}
					$new_cigar_2 .= "$count_2$last_op_2"; # appending the last operation and count
					$cigar_2 = $new_cigar_2;
					# warn "ignore_3prime_r2 adjusted CIGAR 2 scalar: $cigar_2\n";
				}

				###########################
				###  END IGNORE 3' END  ###
				###########################


				### Adjusting CIGAR string and starting position of reads in reverse orientation which we will pass to the extraction subroutine later on
		
				if ($strand eq '+') {
					### adjusting the start position for all reads mapping to the reverse strand, in this case read 2
					@comp_cigar_2  = reverse@comp_cigar_2; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
					# print "reverse: @comp_cigar_2\n";
		
					my $MDN_count_1 = 0;
					# Matching bases, deletions or skipped regions affect the genomic position of the 3' ends of reads, insertions don't
					foreach (@comp_cigar_1) {
						++$MDN_count_1 if ( ($_ eq 'M') or ($_ eq 'D') or ($_ eq 'N') ); 
					}

					my $MDN_count_2 = 0;
					# Matching bases, deletions or skipped regions affect the genomic position of the 3' ends of reads, insertions don't
					foreach (@comp_cigar_2) {
						++$MDN_count_2 if ( ($_ eq 'M') or ($_ eq 'D') or ($_ eq 'N') ); 
					}

					$end_read_1 = $start_read_1 + $MDN_count_1 - 1;
					$start_read_2 += $MDN_count_2 - 1; ## Passing on the start position on the reverse strand
				}
				else {
					### adjusting the start position for all reads mapping to the reverse strand, in this case read 1
		
					@comp_cigar_1  = reverse@comp_cigar_1; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
					# print "reverse: @comp_cigar_1\n";

					my $MDN_count_1 = 0;
					# Matching bases, deletions or skipped regions affect the genomic position of the 3' ends of reads, insertions don't
					foreach (@comp_cigar_1) {
						++$MDN_count_1 if ( ($_ eq 'M') or ($_ eq 'D') or ($_ eq 'N') );
					}

					$end_read_1 = $start_read_1;	
					$start_read_1 +=  $MDN_count_1 - 1; ### Passing on the start position on the reverse strand
				}

				### 26 07 2016: Adding a check to see if ID1 and ID2 are the same. If this got out of sync the following extractions will contain wrong strand identification and overlap detection
				if ($id_1 eq $id_2){
					# warn "IDs are the same, fine.\n";
				}
				else{ ### in previous versions of Bismark we appended /1 and /2 to the read IDs for easier eyeballing which read is which. These tags need to be removed first
					my $id_1_trunc = $id_1;
					$id_1_trunc =~ s/\/1$//;
					my $id_2_trunc = $id_2;
					$id_2_trunc =~ s/\/2$//;
			  
					unless ($id_1_trunc eq $id_2_trunc){
						die "[FATAL ERROR:] The IDs of Read 1 ($id_1) and Read 2 ($id_2) are not the same. This might be the result of sorting the BAM files by chromosomal position or merging several files with Samtools sort, and this is not compatible with correct methylation extraction. Please use an unsorted file instead or sort the file by name using the command 'samtools sort -n'. Paired-end files may be merged properly (without risking this error) using either 'samtools merge -n' or 'samtools cat'.\n\n";
					}
				}
		  
				if ($strand eq '+') {
					## we first pass the first read which is in + orientation on the forward strand; the last value is the read identity
					print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'+',$index,0,0,$cigar_1,1);
		
					# we next pass the second read which is in - orientation on the reverse strand
					### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we can stop extracting methylation calls from read 2
					print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'-',$index,$no_overlap,$end_read_1,$cigar_2,2);
				}
				else {
					## we first pass the first read which is in - orientation on the reverse strand
					print_individual_C_methylation_states_paired_end_files($meth_call_1,$chrom,$start_read_1,$id_1,'-',$index,0,0,$cigar_1,1);
			  
					# we next pass the second read which is in + orientation on the forward strand
					### if --no_overlap was specified we also pass the end of read 1. If read 2 starts to overlap with read 1 we will stop extracting methylation calls from read 2
					print_individual_C_methylation_states_paired_end_files($meth_call_2,$chrom,$start_read_2,$id_2,'+',$index,$no_overlap,$end_read_1,$cigar_2,2);
				}
		
				$methylation_call_strings_processed += 2; # paired-end = 2 methylation call strings
			}	
		}
    }
	else {
		die "Single-end or paired-end reads not specified properly\n";
	}

	$counting{sequences_count}          = $line_count;
	$counting{methylation_call_strings} = $methylation_call_strings_processed;

	if ($multicore == 1){
		print_splitting_report ();
	}
	elsif ($multicore > 1){
		print_splitting_report_multicore($report_filename,$offset,$line_count,$methylation_call_strings_processed);
		print_mbias_report_multicore($report_filename,$offset,$line_count,$methylation_call_strings_processed);
	}

	return ($process_id,\@pids,$report_filename);

}



sub print_splitting_report{

  ### Calculating methylation percentages if applicable
  warn "\nProcessed $counting{sequences_count} lines in total\n";
  warn "Total number of methylation call strings processed: $counting{methylation_call_strings}\n\n";
  if ($report) {
    print REPORT "\nProcessed $counting{sequences_count} lines in total\n";
    print REPORT "Total number of methylation call strings processed: $counting{methylation_call_strings}\n\n";
  }

  my $percent_meCpG;
  if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
    $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
  }

  my $percent_meCHG;
  if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
    $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
  }

  my $percent_meCHH;
  if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
    $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
  }

  my $percent_non_CpG_methylation;
  if ($merge_non_CpG){
    if ( ($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
      $percent_non_CpG_methylation = sprintf("%.1f",100* ( $counting{total_meCHH_count}+$counting{total_meCHG_count} ) / ( $counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count} ) );
    }
  }

  if ($report){
    ### detailed information about Cs analysed
    print REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";

    my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
    print REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";

    print REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
    print REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
    print REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";

    print REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
    print REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
    print REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";

    ### calculating methylated CpG percentage if applicable
    if ($percent_meCpG){
      print REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
    }
    else{
      print REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
    }

    ### 2-Context Output
    if ($merge_non_CpG){
      if ($percent_non_CpG_methylation){
	print REPORT "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
      }
      else{
	print REPORT "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
      }
    }

    ### 3 Context Output
    else{
      ### calculating methylated CHG percentage if applicable
      if ($percent_meCHG){
	print REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
      }
      else{
	print REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
      }

      ### calculating methylated CHH percentage if applicable
      if ($percent_meCHH){
	print REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
      }
      else{
	print REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
      }
    }
  }

  ### detailed information about Cs analysed for on-screen report
  warn "Final Cytosine Methylation Report\n",'='x33,"\n";

  my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
  warn "Total number of C's analysed:\t$total_number_of_C\n\n";

  warn "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
  warn "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
  warn"Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";

  warn "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
  warn "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
  warn"Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";

  ### printing methylated CpG percentage if applicable
  if ($percent_meCpG){
    warn "C methylated in CpG context:\t${percent_meCpG}%\n";
  }
  else{
    warn "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
  }

  ### 2-Context Output
  if ($merge_non_CpG){
    if ($percent_non_CpG_methylation){
      warn "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
    }
    else{
      warn "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
    }
  }

  ### 3-Context Output
  else{
    ### printing methylated CHG percentage if applicable
    if ($percent_meCHG){
      warn "C methylated in CHG context:\t${percent_meCHG}%\n";
    }
    else{
      warn "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
    }

    ### printing methylated CHH percentage if applicable
    if ($percent_meCHH){
      warn "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
    }
    else{
      warn "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
    }
  }
}

###

sub print_splitting_report_multicore{

  my ($report_filename,$offset,$line_count,$meth_call_strings) = @_;

  # warn "\$report_filename is $report_filename\n";
  my $special_report = $report_filename.".$offset";

  open (SPECIAL_REPORT,'>',$special_report) or die $!;
  # warn "line count\t$line_count\n";
  # warn "meth call strings\t$meth_call_strings\n";

  print SPECIAL_REPORT "line count\t$line_count\n";
  print SPECIAL_REPORT "meth call strings\t$meth_call_strings\n";

  ### Calculating methylation percentages if applicable
  my $percent_meCpG;
  if (($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}) > 0){
    $percent_meCpG = sprintf("%.1f",100*$counting{total_meCpG_count}/($counting{total_meCpG_count}+$counting{total_unmethylated_CpG_count}));
  }

  my $percent_meCHG;
  if (($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
    $percent_meCHG = sprintf("%.1f",100*$counting{total_meCHG_count}/($counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}));
  }

  my $percent_meCHH;
  if (($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}) > 0){
    $percent_meCHH = sprintf("%.1f",100*$counting{total_meCHH_count}/($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}));
  }

  my $percent_non_CpG_methylation;
  if ($merge_non_CpG){
    if ( ($counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count}) > 0){
      $percent_non_CpG_methylation = sprintf("%.1f",100* ( $counting{total_meCHH_count}+$counting{total_meCHG_count} ) / ( $counting{total_meCHH_count}+$counting{total_unmethylated_CHH_count}+$counting{total_meCHG_count}+$counting{total_unmethylated_CHG_count} ) );
    }
  }

  if ($report){

    ### detailed information about Cs analysed
    print SPECIAL_REPORT "Final Cytosine Methylation Report\n",'='x33,"\n";

    my $total_number_of_C = $counting{total_meCHG_count}+$counting{total_meCHH_count}+$counting{total_meCpG_count}+$counting{total_unmethylated_CHG_count}+$counting{total_unmethylated_CHH_count}+$counting{total_unmethylated_CpG_count};
    print SPECIAL_REPORT "Total number of C's analysed:\t$total_number_of_C\n\n";

    print SPECIAL_REPORT "Total methylated C's in CpG context:\t$counting{total_meCpG_count}\n";
    print SPECIAL_REPORT "Total methylated C's in CHG context:\t$counting{total_meCHG_count}\n";
    print SPECIAL_REPORT "Total methylated C's in CHH context:\t$counting{total_meCHH_count}\n\n";

    print SPECIAL_REPORT "Total C to T conversions in CpG context:\t$counting{total_unmethylated_CpG_count}\n";
    print SPECIAL_REPORT "Total C to T conversions in CHG context:\t$counting{total_unmethylated_CHG_count}\n";
    print SPECIAL_REPORT "Total C to T conversions in CHH context:\t$counting{total_unmethylated_CHH_count}\n\n";

    ### calculating methylated CpG percentage if applicable
    if ($percent_meCpG){
      print SPECIAL_REPORT "C methylated in CpG context:\t${percent_meCpG}%\n";
    }
    else{
      print SPECIAL_REPORT "Can't determine percentage of methylated Cs in CpG context if value was 0\n";
    }

    ### 2-Context Output
    if ($merge_non_CpG){
      if ($percent_non_CpG_methylation){
	print SPECIAL_REPORT "C methylated in non-CpG context:\t${percent_non_CpG_methylation}%\n\n\n";
      }
      else{
	print SPECIAL_REPORT "Can't determine percentage of methylated Cs in non-CpG context if value was 0\n\n\n";
      }
    }

    ### 3 Context Output
    else{
      ### calculating methylated CHG percentage if applicable
      if ($percent_meCHG){
	print SPECIAL_REPORT "C methylated in CHG context:\t${percent_meCHG}%\n";
      }
      else{
	print SPECIAL_REPORT "Can't determine percentage of methylated Cs in CHG context if value was 0\n";
      }

      ### calculating methylated CHH percentage if applicable
      if ($percent_meCHH){
	print SPECIAL_REPORT "C methylated in CHH context:\t${percent_meCHH}%\n\n\n";
      }
      else{
	print SPECIAL_REPORT "Can't determine percentage of methylated Cs in CHH context if value was 0\n\n\n";
      }
    }
  }
  close SPECIAL_REPORT or warn "Failed to close filehandle for individual report $special_report\n";
}


###

### INDIVIDUAL M-BIAS REPORTS

sub print_mbias_report_multicore{

	my ($report_filename,$offset,$line_count,$meth_call_strings) = @_;

	# warn "\$report_filename is $report_filename\n";
	my $special_mbias_report = $report_filename.".${offset}.mbias";

	open (SPECIAL_MBIAS,'>',$special_mbias_report) or die $!;

	# determining maximum read length
	my $max_length_1 = 0;
	my $max_length_2 = 0;

	foreach my $context (keys %mbias_1){
		foreach my $pos (sort {$a<=>$b} keys %{$mbias_1{$context}}){
			$max_length_1 = $pos unless ($max_length_1 >= $pos);
		}
	}
	if ($paired){
		foreach my $context (keys %mbias_2){
			foreach my $pos (sort {$a<=>$b} keys %{$mbias_2{$context}}){
				$max_length_2 = $pos unless ($max_length_2 >= $pos);
			}
		}
	}

	if ($single){
		# warn "Determining maximum read length for M-Bias plot\n";
		# warn "Maximum read length of Read 1: $max_length_1\n\n";
	}	
	else{
		# warn "Determining maximum read lengths for M-Bias plots\n";
		# warn "Maximum read length of Read 1: $max_length_1\n";
		# warn "Maximum read length of Read 2: $max_length_2\n\n";
	}

  foreach my $context (qw(CpG CHG CHH)){

    if ($paired){
      print SPECIAL_MBIAS "$context context (R1)\n================\n";
    }
    else{
      print SPECIAL_MBIAS "$context context\n===========\n";
    }
    print SPECIAL_MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";

    foreach my $pos (1..$max_length_1){

      unless (defined $mbias_1{$context}->{$pos}->{meth}){
	$mbias_1{$context}->{$pos}->{meth} = 0;
      }
      unless (defined $mbias_1{$context}->{$pos}->{un}){
	$mbias_1{$context}->{$pos}->{un} = 0;
      }

      my $percent = '';
      if (($mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) > 0){
	$percent = sprintf("%.2f",$mbias_1{$context}->{$pos}->{meth} * 100/ ( $mbias_1{$context}->{$pos}->{meth} + $mbias_1{$context}->{$pos}->{un}) );
      }
      my $coverage = $mbias_1{$context}->{$pos}->{un} + $mbias_1{$context}->{$pos}->{meth};

      print SPECIAL_MBIAS "$pos\t$mbias_1{$context}->{$pos}->{meth}\t$mbias_1{$context}->{$pos}->{un}\t$percent\t$coverage\n";
    }
    print SPECIAL_MBIAS "\n";
  }

  if ($paired){

    foreach my $context (qw(CpG CHG CHH)){

      print SPECIAL_MBIAS "$context context (R2)\n================\n";
      print SPECIAL_MBIAS "position\tcount methylated\tcount unmethylated\t% methylation\tcoverage\n";

      foreach my $pos (1..$max_length_2){
	
	unless (defined $mbias_2{$context}->{$pos}->{meth}){
	  $mbias_2{$context}->{$pos}->{meth} = 0;
	}
	unless (defined $mbias_2{$context}->{$pos}->{un}){
	  $mbias_2{$context}->{$pos}->{un} = 0;
	}

	my $percent = '';
	if (($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) > 0){
	  $percent = sprintf("%.2f",$mbias_2{$context}->{$pos}->{meth} * 100/ ($mbias_2{$context}->{$pos}->{meth} + $mbias_2{$context}->{$pos}->{un}) );
	}
	my $coverage = $mbias_2{$context}->{$pos}->{un} + $mbias_2{$context}->{$pos}->{meth};

	print SPECIAL_MBIAS "$pos\t$mbias_2{$context}->{$pos}->{meth}\t$mbias_2{$context}->{$pos}->{un}\t$percent\t$coverage\n";
      }	
    }
  }

  close SPECIAL_MBIAS or warn "Failed to close filehandle for individual M-bias report $special_mbias_report\n";
}


###


sub print_individual_C_methylation_states_paired_end_files{

    my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$no_overlap,$end_read_1,$cigar,$read_identity) = @_;

    unless (defined $meth_call) {
		return; # skip this read
    }

	### we will use the read identity for the M-bias plot to discriminate read 1 and read 2
	die "Read identity was neither 1 nor 2: $read_identity\n\n" unless ($read_identity == 1 or $read_identity == 2);

	my @methylation_calls = split(//,$meth_call);

	#################################################################
	### . for bases not involving cytosines                       ###
	### X for methylated C in CHG context (was protected)         ###
	### x for not methylated C in CHG context (was converted)     ###
	### H for methylated C in CHH context (was protected)         ###
	### h for not methylated C in CHH context (was converted)     ###
	### Z for methylated C in CpG context (was protected)         ###
	### z for not methylated C in CpG context (was converted)     ###
	### U for methylated C in Unknown context (was protected)     ###
	### u for not methylated C in Unknown context (was converted) ###
	#################################################################

	my $methyl_CHG_count = 0;	
	my $methyl_CHH_count = 0;
	my $methyl_CpG_count = 0;
	my $unmethylated_CHG_count = 0;
	my $unmethylated_CHH_count = 0;
	my $unmethylated_CpG_count = 0;

	my $pos_offset = 0; # this is only relevant for SAM reads with insertions or deletions
	my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels
	my @comp_cigar;
	
	### Checking whether the CIGAR string is a linear genomic match or whether if requires indel processing
	if ($cigar =~ /^\d+M$/){
		# this check speeds up the extraction process by up to 60%!!!
	}
	else{ # parsing CIGAR string
		my @len;
		my @ops;
		@len = split (/\D+/,$cigar); # storing the length per operation
		@ops = split (/\d+/,$cigar); # storing the operation
		shift @ops; # remove the empty first element

		die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);

		foreach my $index (0..$#len){
			foreach (1..$len[$index]){
				# print  "$ops[$index]";
				push @comp_cigar, $ops[$index];
			}
		}
		# warn "\nDetected CIGAR string: $cigar\n";
		# warn "Length of methylation call: ",length $meth_call,"\n";
		# warn "number of operations: ",scalar @ops,"\n";
		# warn "number of length digits: ",scalar @len,"\n\n";
		# print @comp_cigar,"\n";
		# print "$meth_call\n\n";
		# sleep (1);
	}

	if ($strand eq '-') {

		### the  CIGAR string needs to be reversed, the methylation call has already been reversed above
		if (@comp_cigar){
			@comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
		}
		#  print "reverse CIGAR string: @comp_cigar\n";

		### the start position of paired-end files has already been corrected, see above
	}

	### THIS IS AN OPTIONAL 2-CONTEXT (CpG and non-CpG) SECTION IF --merge_non_CpG was specified

	if ($merge_non_CpG) {
		if ($no_overlap) { # this has to be read 2...

			### single-file CpG and non-CpG context output
			if ($full) {
				if ($strand eq '+') {
					for my $index (0..$#methylation_calls) {
			
						if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
							my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
							# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
							$cigar_offset += $cigar_mod;
							$pos_offset += $pos_mod;
						}

						### Returning as soon as the methylation calls start overlapping
						if ($start+$index+$pos_offset >= $end_read_1) {
							return;
						}
			
						if ($methylation_calls[$index] eq 'X') {
							$counting{total_meCHG_count}++;
							print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'x') {
							$counting{total_unmethylated_CHG_count}++;
							print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'Z') {
							$counting{total_meCpG_count}++;
							print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'z') {
							$counting{total_unmethylated_CpG_count}++;
							print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'H') {
							$counting{total_meCHH_count}++;
							print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{meth}++;	
							}
						}
						elsif ($methylation_calls[$index] eq 'h') {
							$counting{total_unmethylated_CHH_count}++;
							print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq '.'){}
						elsif (lc$methylation_calls[$index] eq 'u'){}
						else{
							die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
						}
					}
				}
				elsif ($strand eq '-') {
					for my $index (0..$#methylation_calls) {
				
						if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
							# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
							my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);		
							$cigar_offset += $cigar_mod;
							$pos_offset += $pos_mod;
						}
					
						### Returning as soon as the methylation calls start overlapping
						if ($start-$index+$pos_offset <= $end_read_1) {
						  return;
						}
				
						if ($methylation_calls[$index] eq 'X') {
						$counting{total_meCHG_count}++;
						print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'x') {
							$counting{total_unmethylated_CHG_count}++;
							print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'Z') {
							$counting{total_meCpG_count}++;
							print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'z') {
							$counting{total_unmethylated_CpG_count}++;
							print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'H') {
							$counting{total_meCHH_count}++;
							print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'h') {
							$counting{total_unmethylated_CHH_count}++;
							print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq '.') {}
						elsif (lc$methylation_calls[$index] eq 'u'){}
						else{
							die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
						}
					}
				} 
				else {
				  die "The read orientation was neither + nor -: '$strand'\n";
				}
			}
			### strand-specific methylation output
			else {
				if ($strand eq '+') {
					for my $index (0..$#methylation_calls) {

						if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
							my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
							print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
							$cigar_offset += $cigar_mod;
							$pos_offset += $pos_mod;
						}

						### Returning as soon as the methylation calls start overlapping
						if ($start+$index+$pos_offset >= $end_read_1) {
							return;
						}

						if ($methylation_calls[$index] eq 'X') {
							$counting{total_meCHG_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'x') {
							$counting{total_unmethylated_CHG_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'Z') {
							$counting{total_meCpG_count}++;
							print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'z') {
							$counting{total_unmethylated_CpG_count}++;
							print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'H') {
							$counting{total_meCHH_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'h') {
							$counting{total_unmethylated_CHH_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);	
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq '.') {}
						elsif (lc$methylation_calls[$index] eq 'u'){}
						else{
							die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
						}	
					}
				}
				elsif ($strand eq '-') {
					for my $index (0..$#methylation_calls) {

						if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
							# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
							my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
							$cigar_offset += $cigar_mod;
							$pos_offset += $pos_mod;
						}

						### Returning as soon as the methylation calls start overlapping
						if ($start-$index+$pos_offset <= $end_read_1) {
							return;
						}

						if ($methylation_calls[$index] eq 'X') {
							$counting{total_meCHG_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'x') {
							$counting{total_unmethylated_CHG_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'Z') {
							$counting{total_meCpG_count}++;
							print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'z') {
							$counting{total_unmethylated_CpG_count}++;
							print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{un}++;
							}		
						}
						elsif ($methylation_calls[$index] eq 'H') {
							$counting{total_meCHH_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'h') {
							$counting{total_unmethylated_CHH_count}++;
							print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{un}++;
							}		
						}
						elsif ($methylation_calls[$index] eq '.') {}
						elsif (lc$methylation_calls[$index] eq 'u'){}
						else{
							die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
						}
					}
				}
				else {
					die "The strand orientation was neither + nor -: '$strand'/n";
				}
			}
		}
		### this is the paired-end procedure allowing overlaps and using every single C position
		### Still within the 2-CONTEXT ONLY optional section
		else {
			### single-file CpG and non-CpG context output
			if ($full) {
				if ($strand eq '+') {
					for my $index (0..$#methylation_calls) {

						if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
							my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
							# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
							$cigar_offset += $cigar_mod;
							$pos_offset += $pos_mod;
						}

						if ($methylation_calls[$index] eq 'X') {
							$counting{total_meCHG_count}++;
							print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'x') {
							$counting{total_unmethylated_CHG_count}++;
							print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'Z') {
							$counting{total_meCpG_count}++;
							print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'z') {
							$counting{total_unmethylated_CpG_count}++;
							print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CpG}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CpG}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'H') {
							$counting{total_meCHH_count}++;
							print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{meth}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{meth}++;
							}
						}
						elsif ($methylation_calls[$index] eq 'h') {
							$counting{total_unmethylated_CHH_count}++;
							print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
							if ($read_identity == 1){
								$mbias_1{CHH}->{$index+1}->{un}++;
							}
							else{
								$mbias_2{CHH}->{$index+1}->{un}++;
							}
						}
						elsif ($methylation_calls[$index] eq '.') {}
						elsif (lc$methylation_calls[$index] eq 'u'){}
						else{
							die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
						}
					}
				}
				elsif ($strand eq '-') {
					for my $index (0..$#methylation_calls) {

					if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
					  # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
					  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
					  $cigar_offset += $cigar_mod;
					  $pos_offset += $pos_mod;
					}

					if ($methylation_calls[$index] eq 'X') {
					  $counting{total_meCHG_count}++;
					  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					   if ($read_identity == 1){
					$mbias_1{CHG}->{$index+1}->{meth}++;
					  }
					  else{
					$mbias_2{CHG}->{$index+1}->{meth}++;
					  }
					}
					elsif ($methylation_calls[$index] eq 'x') {
					  $counting{total_unmethylated_CHG_count}++;
					  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					  if ($read_identity == 1){
					$mbias_1{CHG}->{$index+1}->{un}++;
					  }
					  else{
					$mbias_2{CHG}->{$index+1}->{un}++;
					  }
					}
					elsif ($methylation_calls[$index] eq 'Z') {
					  $counting{total_meCpG_count}++;
					  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					  if ($read_identity == 1){
					$mbias_1{CpG}->{$index+1}->{meth}++;
					  }
					  else{
					$mbias_2{CpG}->{$index+1}->{meth}++;
					  }
					}
					elsif ($methylation_calls[$index] eq 'z') {
					  $counting{total_unmethylated_CpG_count}++;
					  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					  if ($read_identity == 1){
					$mbias_1{CpG}->{$index+1}->{un}++;
					  }
					  else{
					$mbias_2{CpG}->{$index+1}->{un}++;
					  }
					}
					elsif ($methylation_calls[$index] eq 'H') {
					  $counting{total_meCHH_count}++;
					  print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					  if ($read_identity == 1){
					$mbias_1{CHH}->{$index+1}->{meth}++;
					  }
					  else{
					$mbias_2{CHH}->{$index+1}->{meth}++;
					  }
					}
					elsif ($methylation_calls[$index] eq 'h') {
					  $counting{total_unmethylated_CHH_count}++;
					  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					  if ($read_identity == 1){
					$mbias_1{CHH}->{$index+1}->{un}++;
					  }
					  else{
					$mbias_2{CHH}->{$index+1}->{un}++;
					  }
					}
					elsif ($methylation_calls[$index] eq '.') {}
					elsif (lc$methylation_calls[$index] eq 'u'){}
					else{
					  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
					}
				  }
				} else {
				  die "The strand orientation as neither + nor -: '$strand'\n";
				}
				  }

				  ### strand-specific methylation output
				  ### still within the 2-CONTEXT optional section
      else {
	if ($strand eq '+') {
	  for my $index (0..$#methylation_calls) {

	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
	      $cigar_offset += $cigar_mod;
	      $pos_offset += $pos_mod;
	    }

	    if ($methylation_calls[$index] eq 'X') {
	      $counting{total_meCHG_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	       if ($read_identity == 1){
		$mbias_1{CHG}->{$index+1}->{meth}++;
	      }
	      else{
		$mbias_2{CHG}->{$index+1}->{meth}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'x') {
	      $counting{total_unmethylated_CHG_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHG}->{$index+1}->{un}++;
	      }
	      else{
		$mbias_2{CHG}->{$index+1}->{un}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'Z') {
	      $counting{total_meCpG_count}++;
	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CpG}->{$index+1}->{meth}++;
	      }
	      else{
		$mbias_2{CpG}->{$index+1}->{meth}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'z') {
	      $counting{total_unmethylated_CpG_count}++;
	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CpG}->{$index+1}->{un}++;
	      }
	      else{
		$mbias_2{CpG}->{$index+1}->{un}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'H') {
	      $counting{total_meCHH_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHH}->{$index+1}->{meth}++;
	      }
	      else{
		$mbias_2{CHH}->{$index+1}->{meth}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'h') {
	      $counting{total_unmethylated_CHH_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHH}->{$index+1}->{un}++;
	      }
	      else{
		$mbias_2{CHH}->{$index+1}->{un}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq '.') {}
	    elsif (lc$methylation_calls[$index] eq 'u'){}
	    else{
	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	    }
	  }
	} elsif ($strand eq '-') {
	  for my $index (0..$#methylation_calls) {

	    if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	      # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
	      my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	      $cigar_offset += $cigar_mod;
	      $pos_offset += $pos_mod;
	    }

	    if ($methylation_calls[$index] eq 'X') {
	      $counting{total_meCHG_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHG}->{$index+1}->{meth}++;
	      }
	      else{
		$mbias_2{CHG}->{$index+1}->{meth}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'x') {
	      $counting{total_unmethylated_CHG_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHG}->{$index+1}->{un}++;
	      }
	      else{
		$mbias_2{CHG}->{$index+1}->{un}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'Z') {
	      $counting{total_meCpG_count}++;
	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CpG}->{$index+1}->{meth}++;
	      }
	      else{
		$mbias_2{CpG}->{$index+1}->{meth}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'z') {
	      $counting{total_unmethylated_CpG_count}++;
	      print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CpG}->{$index+1}->{un}++;
	      }
	      else{
		$mbias_2{CpG}->{$index+1}->{un}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'H') {
	      $counting{total_meCHH_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHH}->{$index+1}->{meth}++;
	      }
	      else{
		$mbias_2{CHH}->{$index+1}->{meth}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq 'h') {
	      $counting{total_unmethylated_CHH_count}++;
	      print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	      if ($read_identity == 1){
		$mbias_1{CHH}->{$index+1}->{un}++;
	      }
	      else{
		$mbias_2{CHH}->{$index+1}->{un}++;
	      }
	    }
	    elsif ($methylation_calls[$index] eq '.') {}
	    elsif (lc$methylation_calls[$index] eq 'u'){}
	    else{
	      die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
						}
					}
				} 			
				else {
					die "The strand orientation as neither + nor -: '$strand'\n";
				}
			}
		}
	}

	############################################
	### THIS IS THE DEFAULT 3-CONTEXT OUTPUT ###
	############################################

	elsif ($no_overlap) {
    ### single-file CpG, CHG and CHH context output
    if ($full) {
      if ($strand eq '+') {
	for my $index (0..$#methylation_calls) {
	
	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
	    $cigar_offset += $cigar_mod;
	    $pos_offset += $pos_mod;
	  }

	  ### Returning as soon as the methylation calls start overlapping
	  if ($start+$index+$pos_offset >= $end_read_1) {
	    return;
	  }
	
	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  } 
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }
	}
      } elsif ($strand eq '-') {
	for my $index (0..$#methylation_calls) {

	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	    $cigar_offset += $cigar_mod;
	    $pos_offset += $pos_mod;
	  }

	  ### Returning as soon as the methylation calls start overlapping
	  if ($start-$index+$pos_offset <= $end_read_1) {
	    return;
	  }
	
	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }
	}
      } else {
	die "The strand orientation as neither + nor -: '$strand'\n";
      }
    }

    ### strand-specific methylation output
    else {
      if ($strand eq '+') {
	for my $index (0..$#methylation_calls) {

	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
	    $cigar_offset += $cigar_mod;
	    $pos_offset += $pos_mod;
	  }

	  ### Returning as soon as the methylation calls start overlapping
	  if ($start+$index+$pos_offset >= $end_read_1) {
	    return;
	  }
	
	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }	
	}
      } elsif ($strand eq '-') {
	for my $index (0..$#methylation_calls) {

	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	    $cigar_offset += $cigar_mod;
	    $pos_offset += $pos_mod;
	  }

	  ### Returning as soon as the methylation calls start overlapping
	  if ($start-$index+$pos_offset <= $end_read_1) {
	    return;
	  }
	
	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }
	}
      } else {
	die "The strand orientation as neither + nor -: '$strand'\n";
      }
    }
	}

	### this is the paired-end procedure allowing overlaps and using every single C position
	else {
	
		
		### single-file CpG, CHG and CHH context output
		if ($full) {
			if ($strand eq '+') {
				for my $index (0..$#methylation_calls) {

					if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
						my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
						# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
						$cigar_offset += $cigar_mod;
						$pos_offset += $pos_mod;
					}

	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }
	}
      } elsif ($strand eq '-') {
	for my $index (0..$#methylation_calls) {

	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	    $cigar_offset += $cigar_mod;
	    $pos_offset += $pos_mod;
	  }

	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }
	}
      } else {
	die "The strand orientation as neither + nor -: '$strand'\n";
      }
    }

    ### strand-specific methylation output
    else {
		
		if ($strand eq '+') {
			for my $index (0..$#methylation_calls) {

				if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
					my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
						# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
						$cigar_offset += $cigar_mod;
						$pos_offset += $pos_mod;
					}

			if ($methylation_calls[$index] eq 'X') {
			$counting{total_meCHG_count}++;
			print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
			if ($read_identity == 1){
			  $mbias_1{CHG}->{$index+1}->{meth}++;
			}
			else{
			  $mbias_2{CHG}->{$index+1}->{meth}++;
			}
		  }
		  elsif ($methylation_calls[$index] eq 'x') {
			$counting{total_unmethylated_CHG_count}++;
			print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
			if ($read_identity == 1){
			  $mbias_1{CHG}->{$index+1}->{un}++;
			}
			else{
			  $mbias_2{CHG}->{$index+1}->{un}++;
			}
		  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq '.') {}
	  elsif (lc$methylation_calls[$index] eq 'u'){}
	  else{
	    die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	  }
	}
      } elsif ($strand eq '-') {
	for my $index (0..$#methylation_calls) {

	  if ($cigar and @comp_cigar){ # only needed for SAM reads with InDels
	    # print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
	    my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	    $cigar_offset += $cigar_mod;
	    $pos_offset += $pos_mod;
	  }

	  if ($methylation_calls[$index] eq 'X') {
	    $counting{total_meCHG_count}++;
	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'x') {
	    $counting{total_unmethylated_CHG_count}++;
	    print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'Z') {
	    $counting{total_meCpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'z') {
	    $counting{total_unmethylated_CpG_count}++;
	    print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CpG}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CpG}->{$index+1}->{un}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'H') {
	    $counting{total_meCHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{meth}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{meth}++;
	    }
	  }
	  elsif ($methylation_calls[$index] eq 'h') {
	    $counting{total_unmethylated_CHH_count}++;
	    print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	    if ($read_identity == 1){
	      $mbias_1{CHH}->{$index+1}->{un}++;
	    }
	    else{
	      $mbias_2{CHH}->{$index+1}->{un}++;
	    }
	  }
					elsif ($methylation_calls[$index] eq '.') {}
					elsif (lc$methylation_calls[$index] eq 'u'){}
					else{
						die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
					}
				}
			} 
			else {
				die "The strand orientation as neither + nor -: '$strand'\n";
			}
		}
	}
}

sub check_cigar_string {
	
	my ($index,$cigar_offset,$pos_offset,$strand,$comp_cigar) = @_;
	# print "Strand: $strand\n";
	# print "$index\t$cigar_offset\t$pos_offset\t$strand\t@$comp_cigar\n"; sleep(1);
	my ($new_cigar_offset,$new_pos_offset) = (0,0);

	if ($strand eq '+') {
		#  print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";

		if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
			#  warn "position needs no adjustment\n";
		}
		elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
			$new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
			# warn "adjusted genomic position by -1 bp (insertion)\n";
		}
		elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'S'){ # soft-clipped position in the read sequence
			$new_pos_offset -= 1; # we need to subtract the length of soft-clipped bases from the genomic position as they would be added otherwise
			# warn "adjusted genomic position by -1 bp (soft-clip)\n";
		}
		elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D' or @$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'N'){ 
			# deletion or skipped region in the read sequence 
			$new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
			$new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
			# warn "adjusted genomic position by +1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";

			while ( ($index + $cigar_offset + $new_cigar_offset)  < (scalar @$comp_cigar) ){	
				if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
					#  warn "position needs no adjustment\n";
					last;
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
					$new_pos_offset -= 1; # we need to subtract the length of inserted bases from the genomic position
					# warn "adjusted genomic position by another -1 bp (insertion)\n";
					last;
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'S'){
					$new_pos_offset -= 1; # we need to subtract the length of soft-clipped bases from the genomic position
					# warn "adjusted genomic position by another -1 bp (soft-clip)\n";
					last;
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
					$new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
					$new_pos_offset += 1; # we need to add the length of deleted bases to get the genomic position
					# warn "adjusted genomic position by another +1 bp (deletion)\n";
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'N'){ # skipped region in the read sequence (splicing)
					$new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
					$new_pos_offset += 1; # we need to add the length of skipped bases to get the genomic position
					# warn "adjusted genomic position by another +1 bp (skipped base (N))\n";
				}
				else{
					die "The CIGAR string contained undefined operations in addition to 'M', 'I', 'D', 'S' or 'N': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
				}
			}
		}
		else{
			die "The CIGAR string contained undefined operations in addition to 'M', 'I','N', 'S' and 'D': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
		}
	}
	elsif ($strand eq '-') {
		# print "### $strand strand @$comp_cigar[$index + $cigar_offset]\t";

		if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
			# warn "position needs no adjustment\n";
		}
		elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){ # insertion in the read sequence
			$new_pos_offset += 1; # we need to add the length of inserted bases to the genomic position
			# warn "adjusted genomic position by +1 bp (insertion)\n";
		}
		elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'S'){ # soft-clipped positions in the read sequence
			$new_pos_offset += 1; # we need to add the length of soft-clipped bases to the genomic position
			# warn "adjusted genomic position by +1 bp (soft-clip)\n";
		}
		elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D' or @$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'N'){ 
			# deletion or skipped region in the read sequence
			$new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
			$new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
			# warn "adjusted genomic position by -1 bp (deletion). Now looping through the CIGAR string until we hit another M or I\n";

			while ( ($index + $cigar_offset + $new_cigar_offset)  < (scalar @$comp_cigar) ){	
				if (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'M'){ # sequence position matches the genomic position
					# warn "Found new 'M' operation; position needs no adjustment\n";
					last;	
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'I'){
					$new_pos_offset += 1; # we need to subtract the length of inserted bases from the genomic position
					# warn "Found new 'I' operation; adjusted genomic position by another +1 bp (insertion)\n";
					last;
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'S'){
					$new_pos_offset += 1; # we need to subtract the length of soft-clipped bases from the genomic position
					# warn "Found new 'S' operation; adjusted genomic position by another +1 bp (soft-clip)\n";
					last;
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'D'){ # deletion in the read sequence
					$new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
					$new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
					# warn "adjusted genomic position by another -1 bp (deletion)\n";
				}
				elsif (@$comp_cigar[$index + $cigar_offset + $new_cigar_offset] eq 'N'){ # skipped region in the read sequence
					$new_cigar_offset += 1; # the composite cigar string does no longer match the methylation call index
					$new_pos_offset -= 1; # we need to subtract the length of deleted bases to get the genomic position
					# warn "adjusted genomic position by another -1 bp (skipped base N)\n";
				}
				else{
					die "The CIGAR string contained undefined operations in addition to 'M', 'I', 'D', 'S' or 'N': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
				}
			}
		}
		else{
			die "The CIGAR string contained undefined operations in addition to 'M', 'I', 'D', 'S' or 'N': '@$comp_cigar[$index + $cigar_offset + $new_cigar_offset]'\n";
		}
	}
	# print "Returning new cigar offset: $new_cigar_offset\tnew pos offset: $new_pos_offset\n";
	return ($new_cigar_offset,$new_pos_offset);
}


sub print_individual_C_methylation_states_single_end{
	my ($meth_call,$chrom,$start,$id,$strand,$filehandle_index,$cigar) = @_;
	my $end = $start; # determinining end further down
	
	my @methylation_calls = split(//,$meth_call);
	# print join ("\n",$meth_call,$chrom,$start,$id,$strand,$filehandle_index,$cigar),"\n";
	#################################################################
	### . for bases not involving cytosines                       ###
	### X for methylated C in CHG context (was protected)         ###
	### x for not methylated C in CHG context (was converted)     ###
	### H for methylated C in CHH context (was protected)         ###
	### h for not methylated C in CHH context (was converted)     ###
	### Z for methylated C in CpG context (was protected)         ###
	### z for not methylated C in CpG context (was converted)     ###
	#################################################################

	my $methyl_CHG_count = 0;
	my $methyl_CHH_count = 0;
	my $methyl_CpG_count = 0;
	my $unmethylated_CHG_count = 0;
	my $unmethylated_CHH_count = 0;
	my $unmethylated_CpG_count = 0;

	my $pos_offset = 0;   # this is only relevant for SAM reads with insertions or deletions
	my $cigar_offset = 0; # again, this is only relevant for SAM reads containing indels

	my @comp_cigar;

	if ($cigar){ # parsing CIGAR string
		
		### Checking whether the CIGAR string is a linear genomic match or whether it requires indel processing
		if ($cigar =~ /^(\d+)M$/){
			# warn "See!? I told you so! $cigar\n";
			# sleep(1);
			if ($yacht and $strand eq '+') { # adding to $end only for forward reads
				$end += $1 - 1;
			}
		}
		else{
	  		my @len;
			my @ops;
			@len = split (/\D+/,$cigar); # storing the length per operation
			@ops = split (/\d+/,$cigar); # storing the operation
			shift @ops; # remove the empty first element
			# die "CIGAR string contained a non-matching number of lengths and operations: id: $id\nmeth call: $meth_call\nCIGAR: $cigar\n".join(" ",@len)."\n".join(" ",@ops)."\n" unless (scalar @len == scalar @ops);
			die "CIGAR string contained a non-matching number of lengths and operations\n" unless (scalar @len == scalar @ops);
			
			foreach my $index (0..$#len){
				foreach (1..$len[$index]){
					# print  "$ops[$index]"; sleep(1);
					push @comp_cigar, $ops[$index];
				}
			}
	  
			#  Determining end of the read for optional fragment length output
			if ($yacht and $strand eq '+'){ # adding to $end only for forward reads
				my $MDN_count = 0;
				foreach (@comp_cigar){
					++$MDN_count if (($_ eq 'M') or ($_ eq 'D') or ($_ eq 'N') ); # Matching bases, deletions or splicing affect the genomic position of the 3' ends of reads, insertions don't
				}
				$end += $MDN_count - 1;
			}
		  
			#warn "\nDetected CIGAR string: $cigar\n";
			#warn "Length of methylation call: ",length $meth_call,"\n";
			#warn "number of operations: ",scalar @ops,"\n";
			#warn "number of length digits: ",scalar @len,"\n\n";
			#print @comp_cigar,"\n";
			#print "$meth_call\n\n";
			#sleep (1);
		}
	}

	### adjusting the start position for all reads mapping to the reverse strand
	if ($strand eq '-') {

		if (@comp_cigar){ # only needed for SAM reads with InDels
			@comp_cigar  = reverse@comp_cigar; # the CIGAR string needs to be reversed for all reads aligning to the reverse strand, too
			# print @comp_cigar,"\n";
		}

		unless ($ignore){  ### if --ignore was specified the start position has already been corrected

			if ($cigar){ ### SAM format
				if ($cigar =~ /^(\d+)M$/){ # linear match
					$start += $1 - 1;
				}
				else{ # InDel read
					my $MDN_count = 0;
					foreach (@comp_cigar){
						++$MDN_count if ( ($_ eq 'M') or ($_ eq 'D') or ($_ eq 'N') ); # Matching bases, deletions or spliced reads affect the 
						# genomic position of the 3' ends of reads, insertions or soft-clips don't
					}
					$start += $MDN_count - 1;
				}
			}
			else{ ### vanilla format
				die "This used to be the vanilla format, but should not occur any more";#$start += length($meth_call)-1;
			}
		}
	}
  
	### THIS IS THE CpG and Non-CpG SECTION (OPTIONAL)

	### single-file CpG and other-context output
	if ($full and $merge_non_CpG) {
		# warn "$chrom\t$start\t$end\t$strand\t$cigar\n";
		if ($strand eq '+') {
			for my $index (0..$#methylation_calls) {
	      
				if ($cigar and @comp_cigar){ # only needed for SAM alignments with InDels
					my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
					# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition+index: ",$start+$index,"\t";
					$cigar_offset += $cigar_mod;
					$pos_offset += $pos_mod;
				}	
			  
				my $line;
	      
				### methylated Cs (any context) will receive a forward (+) orientation
				### not methylated Cs (any context) will receive a reverse (-) orientation
				if ($methylation_calls[$index] eq 'X') {
					$counting{total_meCHG_count}++;
					if ($yacht){
						$line = join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{other_context}} $line unless($mbias_only);
					}
					# print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'x') {
					$counting{total_unmethylated_CHG_count}++;
					if ($yacht){
						$line = join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{other_context}} $line unless($mbias_only);
					}
					#  print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq 'Z') {
					$counting{total_meCpG_count}++;
					if ($yacht){
						$line = join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{CpG_context}} $line unless($mbias_only);
					}
					# print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CpG}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'z') {
					$counting{total_unmethylated_CpG_count}++;
					if ($yacht){
						$line = join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{CpG_context}} $line unless($mbias_only);
					}
					# print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CpG}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq 'H') {
					$counting{total_meCHH_count}++;
					if ($yacht){
						$line = join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{other_context}} $line unless($mbias_only);
					}
					# print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHH}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'h') {
					$counting{total_unmethylated_CHH_count}++;
					if ($yacht){
						$line = join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{other_context}} $line unless($mbias_only);
					}
					# print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHH}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq '.') {}
				elsif (lc$methylation_calls[$index] eq 'u'){}
				else{
					die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
				}
			}
		}
		elsif ($strand eq '-') {	

			for my $index (0..$#methylation_calls) {
				### methylated Cs (any context) will receive a forward (+) orientation
				### not methylated Cs (any context) will receive a reverse (-) orientation

				if ($cigar and @comp_cigar){ # only needed for SAM entries with InDels
					# print "index: $index\tmethylation_call: $methylation_calls[$index]\tposition-index: ",$start-$index,"\t";
					my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
					$cigar_offset += $cigar_mod;
					$pos_offset += $pos_mod;
				}

				my $line;

				if ($methylation_calls[$index] eq 'X') {
					$counting{total_meCHG_count}++;
					if ($yacht){
						$line = join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
						print {$fhs{any_context}} $line;
					}
					else{
						$line = join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n";
						print {$fhs{other_context}} $line unless($mbias_only);
					}
		   
					#print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{meth}++;
				}
			elsif ($methylation_calls[$index] eq 'x') {
				$counting{total_unmethylated_CHG_count}++;
				if ($yacht){
				$line = join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
				print {$fhs{any_context}} $line;
				}
				else{
				$line = join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n";
				print {$fhs{other_context}} $line unless($mbias_only);
				}
				#print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
				$mbias_1{CHG}->{$index+1}->{un}++;
			}
			elsif ($methylation_calls[$index] eq 'Z') {
				$counting{total_meCpG_count}++;
				if ($yacht){
				$line = join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
				print {$fhs{any_context}} $line;
				}
				else{
				$line = join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n";
				print {$fhs{CpG_context}} $line unless($mbias_only);	  
				}
				#print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
				$mbias_1{CpG}->{$index+1}->{meth}++;
			}
			elsif ($methylation_calls[$index] eq 'z') {
				$counting{total_unmethylated_CpG_count}++;
				if ($yacht){
				$line = join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
				print {$fhs{any_context}} $line;
				}
				else{
				$line = join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n"; 
				print {$fhs{CpG_context}} $line unless($mbias_only);
				}
				#print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
				$mbias_1{CpG}->{$index+1}->{un}++;
			}
			elsif ($methylation_calls[$index] eq 'H') {
				$counting{total_meCHH_count}++;
				if ($yacht){
				$line = join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
				print {$fhs{any_context}} $line;
				}
				else{
				$line = join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n";
				print {$fhs{other_context}} $line unless($mbias_only);
				}
				#print {$fhs{other_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
				$mbias_1{CHH}->{$index+1}->{meth}++;
			}
			elsif ($methylation_calls[$index] eq 'h') {
				$counting{total_unmethylated_CHH_count}++;
				if ($yacht){
				$line = join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index],$start,$end,$strand)."\n";
				print {$fhs{any_context}} $line;
				}
				else{
				$line = join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n";
				print {$fhs{other_context}} $line unless($mbias_only);
				}
				#print {$fhs{other_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
				$mbias_1{CHH}->{$index+1}->{un}++;
			}
			elsif ($methylation_calls[$index] eq '.'){}
			elsif (lc$methylation_calls[$index] eq 'u'){}
			else{
			  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
			}
			  }
		}
			else {
				die "The strand information was neither + nor -: $strand\n";
		}
	}

	### strand-specific methylation output
	elsif ($merge_non_CpG) {
		if ($strand eq '+') {
		for my $index (0..$#methylation_calls) {
	### methylated Cs (any context) will receive a forward (+) orientation
	### not methylated Cs (any context) will receive a reverse (-) orientation

	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	  $cigar_offset += $cigar_mod;
	  $pos_offset += $pos_mod;
	}

	if ($methylation_calls[$index] eq 'X') {
	  $counting{total_meCHG_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'x') {
	  $counting{total_unmethylated_CHG_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'Z') {
	  $counting{total_meCpG_count}++;
	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'z') {
	  $counting{total_unmethylated_CpG_count}++;
	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'H') {
	  $counting{total_meCHH_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'h') {
	  $counting{total_unmethylated_CHH_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq '.') {}
	elsif (lc$methylation_calls[$index] eq 'u'){}
	else{
	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	}
      }
    }
    elsif ($strand eq '-') {

      for my $index (0..$#methylation_calls) {
	### methylated Cs (any context) will receive a forward (+) orientation
	### not methylated Cs (any context) will receive a reverse (-) orientation

    	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	  $cigar_offset += $cigar_mod;
	  $pos_offset += $pos_mod;
	}

	if ($methylation_calls[$index] eq 'X') {
	  $counting{total_meCHG_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'x') {
	  $counting{total_unmethylated_CHG_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'Z') {
	  $counting{total_meCpG_count}++;
	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'z') {
	  $counting{total_unmethylated_CpG_count}++;
	  print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'H') {
	  $counting{total_meCHH_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'h') {
	  $counting{total_unmethylated_CHH_count}++;
	  print {$fhs{$filehandle_index}->{other_c}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq '.') {}
	elsif (lc$methylation_calls[$index] eq 'u'){}
	else{
	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	}
      }
    }
    else {
      die "The strand information was neither + nor -: $strand\n";
    }
  }

	### THIS IS THE 3-CONTEXT (CpG, CHG and CHH) DEFAULT SECTION
	elsif ($full) {
			
		if ($strand eq '+') {
      for my $index (0..$#methylation_calls) {
	### methylated Cs (any context) will receive a forward (+) orientation
	### not methylated Cs (any context) will receive a reverse (-) orientation

	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	  $cigar_offset += $cigar_mod;
	  $pos_offset += $pos_mod;
	}
	
	if ($methylation_calls[$index] eq 'X') {
	  $counting{total_meCHG_count}++;
	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'x') {
	  $counting{total_unmethylated_CHG_count}++;
	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'Z') {
	  $counting{total_meCpG_count}++;
	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'z') {
	  $counting{total_unmethylated_CpG_count}++;
	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'H') {
	  $counting{total_meCHH_count}++;
	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'h') {
	  $counting{total_unmethylated_CHH_count}++;
	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq '.') {}
	elsif (lc$methylation_calls[$index] eq 'u'){}
	else{
	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n" unless($mbias_only);
	}
      }
    }
    elsif ($strand eq '-') {

      for my $index (0..$#methylation_calls) {
	### methylated Cs (any context) will receive a forward (+) orientation
	### not methylated Cs (any context) will receive a reverse (-) orientation

	if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
	  my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
	  $cigar_offset += $cigar_mod;
	  $pos_offset += $pos_mod;
	}
	
	if ($methylation_calls[$index] eq 'X') {
	  $counting{total_meCHG_count}++;
	  print {$fhs{CHG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'x') {
	  $counting{total_unmethylated_CHG_count}++;
	  print {$fhs{CHG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'Z') {
	  $counting{total_meCpG_count}++;
	  print {$fhs{CpG_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'z') {
	  $counting{total_unmethylated_CpG_count}++;
	  print {$fhs{CpG_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CpG}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq 'H') {
	  $counting{total_meCHH_count}++;
	  print {$fhs{CHH_context}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{meth}++;
	}
	elsif ($methylation_calls[$index] eq 'h') {
	  $counting{total_unmethylated_CHH_count}++;
	  print {$fhs{CHH_context}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
	  $mbias_1{CHH}->{$index+1}->{un}++;
	}
	elsif ($methylation_calls[$index] eq '.') {}
	elsif (lc$methylation_calls[$index] eq 'u'){}
	else{
	  die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
	}
      }
    }
    else {
      die "The read had a strand orientation which was neither + nor -: $strand\n";
    }
  }

	### strand-specific methylation output
	else {
		
		if ($strand eq '+') {
			for my $index (0..$#methylation_calls) {
				### methylated Cs (any context) will receive a forward (+) orientation
				### not methylated Cs (any context) will receive a reverse (-) orientation

				if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
					my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
					$cigar_offset += $cigar_mod;
					$pos_offset += $pos_mod;
				}

				if ($methylation_calls[$index] eq 'X') {
					$counting{total_meCHG_count}++;
					print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'x') {
					$counting{total_unmethylated_CHG_count}++;
					print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq 'Z') {
					$counting{total_meCpG_count}++;
					print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CpG}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'z') {
					$counting{total_unmethylated_CpG_count}++;
					print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CpG}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq 'H') {
					$counting{total_meCHH_count}++;
					print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHH}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'h') {
					$counting{total_unmethylated_CHH_count}++;
					print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start+$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHH}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq '.') {}
				elsif (lc$methylation_calls[$index] eq 'u'){}
				else{
					die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
				}
			}
		}
		elsif ($strand eq '-') {

			for my $index (0..$#methylation_calls) {
				### methylated Cs (any context) will receive a forward (+) orientation
				### not methylated Cs (any context) will receive a reverse (-) orientation

				if ($cigar and @comp_cigar){ # only needed for SAM reads with Indels
					my ($cigar_mod,$pos_mod) = check_cigar_string($index,$cigar_offset,$pos_offset,$strand,\@comp_cigar);	
					$cigar_offset += $cigar_mod;
					$pos_offset += $pos_mod;
				}

				if ($methylation_calls[$index] eq 'X') {
					$counting{total_meCHG_count}++;
					print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'x') {
					$counting{total_unmethylated_CHG_count}++;
					print {$fhs{$filehandle_index}->{CHG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHG}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq 'Z') {
					$counting{total_meCpG_count}++;
					print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CpG}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'z') {
					$counting{total_unmethylated_CpG_count}++;
					print {$fhs{$filehandle_index}->{CpG}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CpG}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq 'H') {
					$counting{total_meCHH_count}++;
					print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'+',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHH}->{$index+1}->{meth}++;
				}
				elsif ($methylation_calls[$index] eq 'h') {
					$counting{total_unmethylated_CHH_count}++;
					print {$fhs{$filehandle_index}->{CHH}} join ("\t",$id,'-',$chrom,$start-$index+$pos_offset,$methylation_calls[$index])."\n" unless($mbias_only);
					$mbias_1{CHH}->{$index+1}->{un}++;
				}
				elsif ($methylation_calls[$index] eq '.') {}
				elsif (lc$methylation_calls[$index] eq 'u'){}
				else{
					die "The methylation call string contained the following unrecognised character: $methylation_calls[$index]\n";
				}
			}
		}
		else {
		  die "The strand information was neither + nor -: $strand\n";
		}	
	}
}

sub open_output_filehandles{

	my $filename = shift;

	my $output_filename = (split (/\//,$filename))[-1];
	my $report_filename = $output_filename;

	### OPENING OUTPUT-FILEHANDLES
	if ($report) {
		$report_filename =~ s/\.sam$//;
		$report_filename =~ s/\.bam$//;
		$report_filename =~ s/\.cram$//;
		$report_filename =~ s/\.txt$//;
		$report_filename =~ s/$/_splitting_report.txt/;
		$report_filename = $output_dir . $report_filename;
		open (REPORT,'>',$report_filename) or die "Failed to write to file $report_filename $!\n";
	}

	if ($report) {

		print REPORT "$output_filename\n\n";
		print REPORT "Parameters used to extract methylation information:\n";
		print REPORT "Bismark Extractor Version: $version\n";
      
		if ($paired) {
			print REPORT "Bismark result file: paired-end (SAM format)\n"; # default
		}

		if ($single) {
			print REPORT "Bismark result file: single-end (SAM format)\n"; # default
		}
		if ($single){
			if ($ignore) {
				print REPORT "Ignoring first $ignore bp\n";
			}
			if ($ignore_3prime) {
				print REPORT "Ignoring last $ignore_3prime bp\n";
			}
		}	
		else{ # paired-end
			if ($ignore) {
				print REPORT "Ignoring first $ignore bp of Read 1\n";
			}
			if ($ignore_r2){
				print REPORT "Ignoring first $ignore_r2 bp of Read 2\n";
			}
	  
			if ($ignore_3prime) {
				print REPORT "Ignoring last $ignore_3prime bp of Read 1\n";
			}
			if ($ignore_3prime_r2){
				print REPORT "Ignoring last $ignore_3prime_r2 bp of Read 2\n";
			}
		}	
      
		if ($full) {
			print REPORT "Output specified: comprehensive\n";
		} 
		else {
			print REPORT "Output specified: strand-specific (default)\n";
		}
      
		if ($no_overlap) {
			print REPORT "No overlapping methylation calls specified\n";
		}
		if ($genomic_fasta) {
			print REPORT "Genomic equivalent sequences will be printed out in FastA format\n";
		}
		if ($merge_non_CpG) {
			print REPORT "Methylation in CHG and CHH context will be merged into \"non-CpG context\" output\n";
		}
      
		print REPORT "\n";
	}

  #####   open (OUT,"| gzip -c - > $output_dir$outfile") or die "Failed to write to $outfile: $!\n";

  ### --YACHT: Yet Another Context Hunting Tool: Cytosine output irrespective of cytosine context. THIS SECTION IS OPTIONAL
  ### and meant to be fed into the NOMe_filtering script afterwards
  ### if --comprehensive AND --merge_non_CpG AND --yacht were specified we only write out a single "any_C_context" result file
  if ($yacht){
      my $cytosine_output = $output_filename;
      ### C in CpG context
      $cytosine_output =~ s/^/any_C_context_/;
      $cytosine_output =~ s/sam$/txt/;
      $cytosine_output =~ s/bam$/txt/;
      $cytosine_output =~ s/cram$/txt/;
      $cytosine_output =~ s/$/.txt/ unless ($cytosine_output =~ /\.txt$/);
      $cytosine_output = $output_dir . $cytosine_output;
      
      if ($gzip){
	  $cytosine_output .= '.gz';
	  open ($fhs{any_context},"| gzip -c - > $cytosine_output") or die "Failed to write to $cytosine_output $! \n";
      }
      else{ ### disclaimer: I am aware of "The Useless Use of Cat Awards", but I saw no other option...
	  open ($fhs{any_context},"| cat > $cytosine_output") or die "Failed to write to $cytosine_output $! \n";
      }
      
      warn "Writing result file containing methylation information for C in any context to $cytosine_output\n";
      push @sorting_files,$cytosine_output;
      
      unless ($no_header) {
	  print {$fhs{any_context}} "Bismark methylation extractor version $version\n";
      }
  }
  ### CpG-context and non-CpG context. THIS SECTION IS OPTIONAL
  ### if --comprehensive AND --merge_non_CpG was specified we are only writing out one CpG-context and one Any-Other-context result file
  elsif ($full and $merge_non_CpG) {
      my $cpg_output = my $other_c_output = $output_filename;
      ### C in CpG context
      $cpg_output =~ s/^/CpG_context_/;
      $cpg_output =~ s/sam$/txt/;
      $cpg_output =~ s/bam$/txt/;
      $cpg_output =~ s/cram$/txt/;
      $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
      $cpg_output = $output_dir . $cpg_output;

      if ($gzip){
	  $cpg_output .= '.gz';
	  open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
      }
      else{ ### disclaimer: I am aware of "The Useless Use of Cat Awards", but I saw no other option...
	  open ($fhs{CpG_context},"| cat > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
	  # open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n" unless($mbias_only);
      push @sorting_files,$cpg_output;
      
      unless ($no_header) {
	  print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }

      ### C in any other context than CpG
      $other_c_output =~ s/^/Non_CpG_context_/;
      $other_c_output =~ s/sam$/txt/;
      $other_c_output =~ s/bam$/txt/;
      $other_c_output =~ s/cram$/txt/;
      $other_c_output =~ s/$/.txt/ unless ($other_c_output =~ /\.txt$/);
      $other_c_output = $output_dir . $other_c_output;
      
      if ($gzip){
	  $other_c_output .= '.gz';
	  open ($fhs{other_context},"| gzip -c - > $other_c_output") or die "Failed to write to $other_c_output $! \n" unless($mbias_only);
      }
      else{
	  open ($fhs{other_context},"| cat > $other_c_output") or die "Failed to write to $other_c_output $!\n" unless($mbias_only);
	  #   open ($fhs{other_context},'>',$other_c_output) or die "Failed to write to $other_c_output $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in any other context to $other_c_output\n" unless($mbias_only);
      push @sorting_files,$other_c_output;
      
      
      unless ($no_header) {
	  print {$fhs{other_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
  }
  ### if only --merge_non_CpG was specified we will write out 8 different output files, depending on where the (first) unique best alignment has been found
  elsif ($merge_non_CpG) {
      
      my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
      
      ### For cytosines in CpG context
      $cpg_ot =~ s/^/CpG_OT_/;
      $cpg_ot =~ s/sam$/txt/;
      $cpg_ot =~ s/bam$/txt/;
      $cpg_ot =~ s/cram$/txt/;
      $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
      $cpg_ot = $output_dir . $cpg_ot;
      
      if ($gzip){
	  $cpg_ot .= '.gz';
	  open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{0}->{CpG},"| cat > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
	  # open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n" unless($mbias_only);
      push @sorting_files,$cpg_ot;
      
      unless($no_header){
	  print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $cpg_ctot =~ s/^/CpG_CTOT_/;
      $cpg_ctot =~ s/sam$/txt/;
      $cpg_ctot =~ s/bam$/txt/; 
      $cpg_ctot =~ s/cram$/txt/;
      $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
      $cpg_ctot = $output_dir . $cpg_ctot;
      
      if ($gzip){
	  $cpg_ctot .= '.gz';
	  open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{1}->{CpG},"| cat > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
	  # open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n" unless($mbias_only);
      push @sorting_files,$cpg_ctot;
      
      unless($no_header){
	  print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $cpg_ctob =~ s/^/CpG_CTOB_/;
      $cpg_ctob =~ s/sam$/txt/;
      $cpg_ctob =~ s/bam$/txt/;
      $cpg_ctob =~ s/cram$/txt/;
      $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
      $cpg_ctob = $output_dir . $cpg_ctob;
      
      if ($gzip){
	  $cpg_ctob .= '.gz';
	  open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{2}->{CpG},"| cat > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
	  # open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n" unless($mbias_only);
      push @sorting_files,$cpg_ctob;
      
      unless($no_header){
	  print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $cpg_ob =~ s/^/CpG_OB_/;
      $cpg_ob =~ s/sam$/txt/;
      $cpg_ob =~ s/bam$/txt/;
      $cpg_ob =~ s/cram$/txt/;
      $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
      $cpg_ob = $output_dir . $cpg_ob;
      
      if ($gzip){
	  $cpg_ob .= '.gz';
	  open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{3}->{CpG},"| cat > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
	  # open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n" unless($mbias_only);
      push @sorting_files,$cpg_ob;
      
      unless($no_header){
	  print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      ### For cytosines in Non-CpG (CC, CT or CA) context
      my $other_c_ot = my $other_c_ctot = my $other_c_ctob = my $other_c_ob = $output_filename;
      
      $other_c_ot =~ s/^/Non_CpG_OT_/;
      $other_c_ot =~ s/sam$/txt/;
      $other_c_ot =~ s/bam$/txt/;
      $other_c_ot =~ s/cram$/txt/;
      $other_c_ot =~ s/$/.txt/ unless ($other_c_ot =~ /\.txt$/);
      $other_c_ot = $output_dir . $other_c_ot;
      
      if ($gzip){
	  $other_c_ot .= '.gz';
	  open ($fhs{0}->{other_c},"| gzip -c - > $other_c_ot") or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{0}->{other_c},"| cat > $other_c_ot") or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
	  # open ($fhs{0}->{other_c},'>',$other_c_ot) or die "Failed to write to $other_c_ot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in any other context from the original top strand to $other_c_ot\n" unless($mbias_only);
      push @sorting_files,$other_c_ot;
      
      unless($no_header){
	  print {$fhs{0}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $other_c_ctot =~ s/^/Non_CpG_CTOT_/;
      $other_c_ctot =~ s/sam$/txt/;
      $other_c_ctot =~ s/bam$/txt/;
      $other_c_ctot =~ s/cram$/txt/;
      $other_c_ctot =~ s/$/.txt/ unless ($other_c_ctot =~ /\.txt$/);
      $other_c_ctot = $output_dir . $other_c_ctot;
      
      if ($gzip){
	  $other_c_ctot .= '.gz';
	  open ($fhs{1}->{other_c},"| gzip -c - > $other_c_ctot") or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{1}->{other_c},"|  cat > $other_c_ctot") or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
	  # open ($fhs{1}->{other_c},'>',$other_c_ctot) or die "Failed to write to $other_c_ctot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in any other context from the complementary to original top strand to $other_c_ctot\n" unless($mbias_only);
      push @sorting_files,$other_c_ctot;
      
      unless($no_header){
	  print {$fhs{1}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $other_c_ctob =~ s/^/Non_CpG_CTOB_/;
      $other_c_ctob =~ s/sam$/txt/;
      $other_c_ctob =~ s/bam$/txt/;
      $other_c_ctob =~ s/cram$/txt/;
      $other_c_ctob =~ s/$/.txt/ unless ($other_c_ctob =~ /\.txt$/);
      $other_c_ctob = $output_dir . $other_c_ctob;
      
      if ($gzip){
	  $other_c_ctob .= '.gz';
	  open ($fhs{2}->{other_c},"| gzip -c - > $other_c_ctob") or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{2}->{other_c},"| cat > $other_c_ctob") or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
	  # open ($fhs{2}->{other_c},'>',$other_c_ctob) or die "Failed to write to $other_c_ctob $!\n" unless($mbias_only);
    }
      
      warn "Writing result file containing methylation information for C in any other context from the complementary to original bottom strand to $other_c_ctob\n" unless($mbias_only);
      push @sorting_files,$other_c_ctob;
      
      unless($no_header){
	  print {$fhs{2}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $other_c_ob =~ s/^/Non_CpG_OB_/;
      $other_c_ob =~ s/sam$/txt/;
      $other_c_ob =~ s/sam$/txt/;
      $other_c_ob =~ s/cram$/txt/;
      $other_c_ob =~ s/$/.txt/ unless ($other_c_ob =~ /\.txt$/);
      $other_c_ob = $output_dir . $other_c_ob;
      
      if ($gzip){
	  $other_c_ob .= '.gz';
	  open ($fhs{3}->{other_c},"| gzip -c - > $other_c_ob") or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{3}->{other_c},"| cat > $other_c_ob") or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
	  # open ($fhs{3}->{other_c},'>',$other_c_ob) or die "Failed to write to $other_c_ob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in any other context from the original bottom strand to $other_c_ob\n\n" unless($mbias_only);
      push @sorting_files,$other_c_ob;
      
      unless($no_header){
	  print {$fhs{3}->{other_c}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
  }

  ### THIS SECTION IS THE DEFAULT (CpG, CHG and CHH context)
  
  ### if --comprehensive was specified we are only writing one file per context
  elsif ($full) {
      my $cpg_output = my $chg_output =  my $chh_output = $output_filename;
      ### C in CpG context
      $cpg_output =~ s/^/CpG_context_/;
      $cpg_output =~ s/sam$/txt/;
      $cpg_output =~ s/bam$/txt/;
      $cpg_output =~ s/cram$/txt/;
      $cpg_output =~ s/$/.txt/ unless ($cpg_output =~ /\.txt$/);
      $cpg_output = $output_dir . $cpg_output;
      
      if ($gzip){
	  $cpg_output .= '.gz';
	  open ($fhs{CpG_context},"| gzip -c - > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
      }
      else{
	  open ($fhs{CpG_context},"| cat > $cpg_output") or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
	  # open ($fhs{CpG_context},'>',$cpg_output) or die "Failed to write to $cpg_output $! \n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context to $cpg_output\n" unless($mbias_only);
      push @sorting_files,$cpg_output;
      
      unless($no_header){
	  print {$fhs{CpG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      ### C in CHG context
      $chg_output =~ s/^/CHG_context_/;
      $chg_output =~ s/sam$/txt/;
      $chg_output =~ s/bam$/txt/;
      $chg_output =~ s/cram$/txt/;
      $chg_output =~ s/$/.txt/ unless ($chg_output =~ /\.txt$/);
      $chg_output = $output_dir . $chg_output;
      
      if ($gzip){
	  $chg_output .= '.gz';
	  open ($fhs{CHG_context},"| gzip -c - > $chg_output") or die "Failed to write to $chg_output $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{CHG_context},"| cat > $chg_output") or die "Failed to write to $chg_output $!\n" unless($mbias_only);
	  # open ($fhs{CHG_context},'>',$chg_output) or die "Failed to write to $chg_output $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHG context to $chg_output\n" unless($mbias_only);
      push @sorting_files,$chg_output;
      
      unless($no_header){
	  print {$fhs{CHG_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      ### C in CHH context
      $chh_output =~ s/^/CHH_context_/;
      $chh_output =~ s/sam$/txt/;
      $chh_output =~ s/bam$/txt/;
      $chh_output =~ s/cram$/txt/;
      $chh_output =~ s/$/.txt/ unless ($chh_output =~ /\.txt$/);
      $chh_output = $output_dir . $chh_output;
      
      if ($gzip){
	  $chh_output .= '.gz';
	  open ($fhs{CHH_context},"| gzip -c - > $chh_output") or die "Failed to write to $chh_output $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{CHH_context},"| cat > $chh_output") or die "Failed to write to $chh_output $!\n" unless($mbias_only);
	  # open ($fhs{CHH_context},'>',$chh_output) or die "Failed to write to $chh_output $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHH context to $chh_output\n" unless($mbias_only);
      push @sorting_files, $chh_output;
      
      unless($no_header){
	  print {$fhs{CHH_context}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
  }
  ### else we will write out 12 different output files, depending on where the (first) unique best alignment was found
  else {
      my $cpg_ot = my $cpg_ctot = my $cpg_ctob = my $cpg_ob = $output_filename;
      
      ### For cytosines in CpG context
      $cpg_ot =~ s/^/CpG_OT_/;
      $cpg_ot =~ s/sam$/txt/;
      $cpg_ot =~ s/bam$/txt/;
      $cpg_ot =~ s/cram$/txt/;
      $cpg_ot =~ s/$/.txt/ unless ($cpg_ot =~ /\.txt$/);
      $cpg_ot = $output_dir . $cpg_ot;
      
      if ($gzip){
	  $cpg_ot .= '.gz';
	  open ($fhs{0}->{CpG},"| gzip -c - > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{0}->{CpG},"| cat > $cpg_ot") or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
	  # open ($fhs{0}->{CpG},'>',$cpg_ot) or die "Failed to write to $cpg_ot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the original top strand to $cpg_ot\n" unless($mbias_only);
      push @sorting_files,$cpg_ot;
      
      unless($no_header){
	  print {$fhs{0}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $cpg_ctot =~ s/^/CpG_CTOT_/;
      $cpg_ctot =~ s/sam$/txt/;
      $cpg_ctot =~ s/bam$/txt/;
      $cpg_ctot =~ s/cram$/txt/;
      $cpg_ctot =~ s/$/.txt/ unless ($cpg_ctot =~ /\.txt$/);
      $cpg_ctot = $output_dir . $cpg_ctot;
      
      if ($gzip){
	  $cpg_ctot .= '.gz';
	  open ($fhs{1}->{CpG},"| gzip -c - > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{1}->{CpG},"| cat > $cpg_ctot") or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
	  # open ($fhs{1}->{CpG},'>',$cpg_ctot) or die "Failed to write to $cpg_ctot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the complementary to original top strand to $cpg_ctot\n" unless($mbias_only);
      push @sorting_files,$cpg_ctot;
      
      unless($no_header){
	  print {$fhs{1}->{CpG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $cpg_ctob =~ s/^/CpG_CTOB_/;
      $cpg_ctob =~ s/sam$/txt/;
      $cpg_ctob =~ s/bam$/txt/;
      $cpg_ctob =~ s/cram$/txt/;
      $cpg_ctob =~ s/$/.txt/ unless ($cpg_ctob =~ /\.txt$/);
      $cpg_ctob = $output_dir . $cpg_ctob;
      
      if ($gzip){
	  $cpg_ctob .= '.gz';
	  open ($fhs{2}->{CpG},"| gzip -c - > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{2}->{CpG},"| cat > $cpg_ctob") or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
	  # open ($fhs{2}->{CpG},'>',$cpg_ctob) or die "Failed to write to $cpg_ctob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the complementary to original bottom strand to $cpg_ctob\n" unless($mbias_only);
      push @sorting_files,$cpg_ctob;
      
      unless($no_header){
	  print {$fhs{2}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $cpg_ob =~ s/^/CpG_OB_/;
      $cpg_ob =~ s/sam$/txt/;
      $cpg_ob =~ s/bam$/txt/;
      $cpg_ob =~ s/cram$/txt/;
      $cpg_ob =~ s/$/.txt/ unless ($cpg_ob =~ /\.txt$/);
      $cpg_ob = $output_dir . $cpg_ob;
      
      if ($gzip){
	  $cpg_ob .= '.gz';
	  open ($fhs{3}->{CpG},"| gzip -c - > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{3}->{CpG},"| cat > $cpg_ob") or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
	  # open ($fhs{3}->{CpG},'>',$cpg_ob) or die "Failed to write to $cpg_ob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CpG context from the original bottom strand to $cpg_ob\n\n" unless($mbias_only);
      push @sorting_files,$cpg_ob;
      
      unless($no_header){
	  print {$fhs{3}->{CpG}}  "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      ### For cytosines in CHG context
      my $chg_ot = my $chg_ctot = my $chg_ctob = my $chg_ob = $output_filename;
      
      $chg_ot =~ s/^/CHG_OT_/;
      $chg_ot =~ s/sam$/txt/;
      $chg_ot =~ s/bam$/txt/;
      $chg_ot =~ s/cram$/txt/;
      $chg_ot =~ s/$/.txt/ unless ($chg_ot =~ /\.txt$/);
      $chg_ot = $output_dir . $chg_ot;
      
      if ($gzip){
	  $chg_ot .= '.gz';
	  open ($fhs{0}->{CHG},"| gzip -c - > $chg_ot") or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{0}->{CHG},"| cat > $chg_ot") or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
	  # open ($fhs{0}->{CHG},'>',$chg_ot) or die "Failed to write to $chg_ot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHG context from the original top strand to $chg_ot\n" unless($mbias_only);
      push @sorting_files,$chg_ot;
      
      unless($no_header){
	  print {$fhs{0}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $chg_ctot =~ s/^/CHG_CTOT_/;
      $chg_ctot =~ s/sam$/txt/;
      $chg_ctot =~ s/bam$/txt/;
      $chg_ctot =~ s/cram$/txt/;
      $chg_ctot =~ s/$/.txt/ unless ($chg_ctot =~ /\.txt$/);
      $chg_ctot = $output_dir . $chg_ctot;
      
      if ($gzip){
	  $chg_ctot .= '.gz';
	  open ($fhs{1}->{CHG},"| gzip -c - > $chg_ctot") or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{1}->{CHG},"| cat > $chg_ctot") or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
	  # open ($fhs{1}->{CHG},'>',$chg_ctot) or die "Failed to write to $chg_ctot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHG context from the complementary to original top strand to $chg_ctot\n" unless($mbias_only);
      push @sorting_files,$chg_ctot;
      
      unless($no_header){
	  print {$fhs{1}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $chg_ctob =~ s/^/CHG_CTOB_/;
      $chg_ctob =~ s/sam$/txt/;
      $chg_ctob =~ s/bam$/txt/;
      $chg_ctob =~ s/cram$/txt/;
      $chg_ctob =~ s/$/.txt/ unless ($chg_ctob =~ /\.txt$/);
      $chg_ctob = $output_dir . $chg_ctob;
      
      if ($gzip){
	  $chg_ctob .= '.gz';
	  open ($fhs{2}->{CHG},"| gzip -c - > $chg_ctob") or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{2}->{CHG},"| cat > $chg_ctob") or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
	  # open ($fhs{2}->{CHG},'>',$chg_ctob) or die "Failed to write to $chg_ctob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHG context from the complementary to original bottom strand to $chg_ctob\n" unless($mbias_only);
      push @sorting_files,$chg_ctob;
      
      unless($no_header){
	  print {$fhs{2}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $chg_ob =~ s/^/CHG_OB_/;
      $chg_ob =~ s/sam$/txt/;
      $chg_ob =~ s/bam$/txt/;
      $chg_ob =~ s/cram$/txt/;
      $chg_ob =~ s/$/.txt/ unless ($chg_ob =~ /\.txt$/);
      $chg_ob = $output_dir . $chg_ob;
      
      if ($gzip){
	  $chg_ob .= '.gz';
	  open ($fhs{3}->{CHG},"| gzip -c - > $chg_ob") or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{3}->{CHG},"| cat > $chg_ob") or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
	  # open ($fhs{3}->{CHG},'>',$chg_ob) or die "Failed to write to $chg_ob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHG context from the original bottom strand to $chg_ob\n\n" unless($mbias_only);
      push @sorting_files,$chg_ob;
      
      unless($no_header){
	  print {$fhs{3}->{CHG}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      ### For cytosines in CHH context
      my $chh_ot = my $chh_ctot = my $chh_ctob = my $chh_ob = $output_filename;
      
      $chh_ot =~ s/^/CHH_OT_/;
      $chh_ot =~ s/sam$/txt/;
      $chh_ot =~ s/bam$/txt/;
      $chh_ot =~ s/cram$/txt/;
      $chh_ot =~ s/$/.txt/ unless ($chh_ot =~ /\.txt$/);
      $chh_ot = $output_dir . $chh_ot;
      
      if ($gzip){
	  $chh_ot .= '.gz';
	  open ($fhs{0}->{CHH},"| gzip -c - > $chh_ot") or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{0}->{CHH},"| cat > $chh_ot") or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
	  # open ($fhs{0}->{CHH},'>',$chh_ot) or die "Failed to write to $chh_ot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHH context from the original top strand to $chh_ot\n" unless($mbias_only);
      push @sorting_files,$chh_ot;
      
      unless($no_header){
	  print {$fhs{0}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $chh_ctot =~ s/^/CHH_CTOT_/;
      $chh_ctot =~ s/sam$/txt/;
      $chh_ctot =~ s/bam$/txt/;
      $chh_ctot =~ s/cram$/txt/;
      $chh_ctot =~ s/$/.txt/ unless ($chh_ctot =~ /\.txt$/);
      $chh_ctot = $output_dir . $chh_ctot;
      
      if ($gzip){
	  $chh_ctot .= '.gz';
	  open ($fhs{1}->{CHH},"| gzip -c - > $chh_ctot") or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{1}->{CHH},"| cat > $chh_ctot") or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
	  # open ($fhs{1}->{CHH},'>',$chh_ctot) or die "Failed to write to $chh_ctot $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHH context from the complementary to original top strand to $chh_ctot\n" unless($mbias_only);
      push @sorting_files,$chh_ctot;
      
      unless($no_header){
	  print {$fhs{1}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $chh_ctob =~ s/^/CHH_CTOB_/;
      $chh_ctob =~ s/sam$/txt/;
      $chh_ctob =~ s/bam$/txt/;
      $chh_ctob =~ s/cram$/txt/;
      $chh_ctob =~ s/$/.txt/ unless ($chh_ctob =~ /\.txt$/);
      $chh_ctob = $output_dir . $chh_ctob;
      
      if ($gzip){
	  $chh_ctob .= '.gz';
	  open ($fhs{2}->{CHH},"| gzip -c - > $chh_ctob") or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{2}->{CHH},"| cat > $chh_ctob") or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
	  # open ($fhs{2}->{CHH},'>',$chh_ctob) or die "Failed to write to $chh_ctob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHH context from the complementary to original bottom strand to $chh_ctob\n" unless($mbias_only);
      push @sorting_files,$chh_ctob;
      
      unless($no_header){
	  print {$fhs{2}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
      
      $chh_ob =~ s/^/CHH_OB_/;
      $chh_ob =~ s/sam$/txt/;
      $chh_ob =~ s/bam$/txt/;
      $chh_ob =~ s/cram$/txt/;
      $chh_ob =~ s/$/.txt/ unless ($chh_ob =~ /\.txt$/);
      $chh_ob = $output_dir . $chh_ob;
      
      if ($gzip){
	  $chh_ob .= '.gz';
	  open ($fhs{3}->{CHH},"| gzip -c - > $chh_ob") or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
      }
      else{
	  open ($fhs{3}->{CHH},"| cat > $chh_ob") or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
	  # open ($fhs{3}->{CHH},'>',$chh_ob) or die "Failed to write to $chh_ob $!\n" unless($mbias_only);
      }
      
      warn "Writing result file containing methylation information for C in CHH context from the original bottom strand to $chh_ob\n\n" unless($mbias_only);
      push @sorting_files,$chh_ob;
      
      unless($no_header){
	  print {$fhs{3}->{CHH}} "Bismark methylation extractor version $version\n" unless($mbias_only);
      }
  }
  return $report_filename;
}

sub get_chromosome_sizes{

	my ($file) = @_;
    warn "Recording chromosome sizes of file >>$file<< from the BAM header...\n";
    
    open (CHRSIZES,"$samtools_path view -H $file |") or die "Unable to read from BAM file $file: $!"; 

    my $count = 0;
    while (<CHRSIZES>){
		chomp;
		++$count;
		if ($_ =~ /^\@SQ/){ 
			#	print "$_\n";
			$_ =~ /SN:(.*)\s+LN:(\d+)/;
			my $chr = $1;
			my $length = $2;
			# making the chromosome names UCSC ready
			if ($chr eq 'MT'){
				$chr = 'chrM';
			}
			unless ($chr =~ /^chr/){
				$chr = "chr$chr";
			}
			# print "$chr $length\n";
			$chromosome_sizes{$chr} = $length;
		}
    }

    close CHRSIZES; #  or warn "$!\n";
	return;
}

sub isBam{

	my $filename = shift;

	# reading the first line of the input file to see if it is a BAM file in disguise (i.e. a BAM file that does not end in *.bam which may be produced by Galaxy)
	open (DISGUISE,"gunzip -c $filename |") or die "Failed to open filehandle DISGUISE for $filename\n\n";

	### when BAM files read through a gunzip -c stream they start with BAM...
	my $bam_in_disguise = <DISGUISE>;
	# warn "BAM in disguise: $bam_in_disguise\n\n";

	if ($bam_in_disguise){
		if ($bam_in_disguise =~ /^BAM/){
			# close (DISGUISE) or warn "Had trouble closing filehandle BAM in disguise: $!\n";
			return 1;
		}
		else{
			# close (DISGUISE) or warn "Had trouble closing filehandle BAM in disguise: $!\n";
			return 0;
		}
	}
	else{
		# close (DISGUISE) or warn "Had trouble closing filehandle BAM in disguise: $!\n";
		return 0;
	}
}


sub print_helpfile{

 print << 'HOW_TO';


DESCRIPTION

The following is a brief description of all options to control the Bismark
methylation extractor. The script reads in a bisulfite read alignment results file 
produced by the Bismark bisulfite mapper (in BAM/CRAM/SAM format) and extracts the
methylation information for individual cytosines. This information is found in the
methylation call field which can contain the following characters:

       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
       ~~~   X   for methylated C in CHG context                      ~~~
       ~~~   x   for not methylated C CHG                             ~~~
       ~~~   H   for methylated C in CHH context                      ~~~
       ~~~   h   for not methylated C in CHH context                  ~~~
       ~~~   Z   for methylated C in CpG context                      ~~~
       ~~~   z   for not methylated C in CpG context                  ~~~
       ~~~   U   for methylated C in Unknown context (CN or CHN       ~~~
       ~~~   u   for not methylated C in Unknown context (CN or CHN)  ~~~
       ~~~   .   for any bases not involving cytosines                ~~~
       ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The methylation extractor outputs result files for cytosines in CpG, CHG and CHH
context (this distinction is actually already made in Bismark itself). As the methylation
information for every C analysed can produce files which easily have tens or even hundreds of
millions of lines, file sizes can become very large and more difficult to handle. The C
methylation info additionally splits cytosine methylation calls up into one of the four possible
strands a given bisulfite read aligned against:

             OT      original top strand
             CTOT    complementary to original top strand

             OB      original bottom strand
             CTOB    complementary to original bottom strand

Thus, by default twelve individual output files are being generated per input file (unless
--comprehensive is specified, see below). The output files can be imported into a genome
viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact
unless the bisulfite reads were generated preserving directionality it doesn't make any
sense to look at the data in a strand-specific manner). Strand-specific output files can
optionally be skipped, in which case only three output files for CpG, CHG or CHH context
will be generated. For both the strand-specific and comprehensive outputs there is also
the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.


The output files are in the following format (tab delimited):

<sequence_id>     <strand>      <chromosome>     <position>     <methylation call>


USAGE: bismark_methylation_extractor [options] <filenames>


ARGUMENTS:
==========

<filenames>              A space-separated list of Bismark result files in SAM format from
                         which methylation information is extracted for every cytosine in
                         the reads.
OPTIONS:

-s/--single-end          Input file(s) are Bismark result file(s) generated from single-end
                         read data. If neither -s nor -p is set the type of experiment will
                         be determined automatically.

-p/--paired-end          Input file(s) are Bismark result file(s) generated from paired-end
                         read data. If neither -s nor -p is set the type of experiment will
                         be determined automatically.

--no_overlap             For paired-end reads it is theoretically possible that read_1 and
                         read_2 overlap. This option avoids scoring overlapping methylation
                         calls twice (only methylation calls of read 1 are used for in the process
                         since read 1 has historically higher quality basecalls than read 2).
                         Whilst this option removes a bias towards more methylation calls
                         in the center of sequenced fragments it may de facto remove a sizable
                         proportion of the data. This option is on by default for paired-end data
                         but can be disabled using --include_overlap. Default: ON.

--include_overlap        For paired-end data all methylation calls will be extracted irrespective of
                         of whether they overlap or not. Default: OFF.

--ignore <int>           Ignore the first <int> bp from the 5' end of Read 1 (or single-end alignment
                         files) when processing the methylation call string. This can remove e.g. a
                         restriction enzyme site at the start of each read or any other source of
                         bias (such as PBAT-Seq data).

--ignore_r2 <int>        Ignore the first <int> bp from the 5' end of Read 2 of paired-end sequencing
                         results only. Since the first couple of bases in Read 2 of BS-Seq experiments
                         show a severe bias towards non-methylation as a result of end-repairing
                         sonicated fragments with unmethylated cytosines (see M-bias plot), it is
                         recommended that the first couple of bp of Read 2 are removed before
                         starting downstream analysis. Please see the section on M-bias plots in the
                         Bismark User Guide for more details.

--ignore_3prime <int>    Ignore the last <int> bp from the 3' end of Read 1 (or single-end alignment
                         files) when processing the methylation call string. This can remove unwanted
                         biases from the end of reads.

--ignore_3prime_r2 <int> Ignore the last <int> bp from the 3' end of Read 2 of paired-end sequencing
                         results only. This can remove unwanted biases from the end of reads.

--comprehensive          Specifying this option will merge all four possible strand-specific
                         methylation info into context-dependent output files. The default

                         contexts are:
                          - CpG context
                          - CHG context
                          - CHH context

--merge_non_CpG          This will produce two output files (in --comprehensive mode) or eight
                         strand-specific output files (default) for Cs in
                          - CpG context
                          - non-CpG context

--no_header              Suppresses the Bismark version header line in all output files for more convenient
                         batch processing.

-o/--output DIR          Allows specification of a different output directory (absolute or relative
                         path). If not specified explicitly, the output will be written to the current directory.

--samtools_path          The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified
                         explicitly if Samtools is in the PATH already.

--gzip                   The methylation extractor files (CpG_OT_..., CpG_OB_... etc) will be written out in
                         a GZIP compressed form to save disk space. This option is also passed on to the genome-wide
                         cytosine report. BedGraph and coverage files will be written out as .gz by default.

--version                Displays version information.

-h/--help                Displays this help file and exits.

--mbias_only             The methylation extractor will read the entire file but only output the M-bias table and plots as 
                         well as a report (optional) and then quit. Default: OFF.

--mbias_off              The methylation extractor will process the entire file as usual but doesn't write out any M-bias report.
                         Only recommended for users who deliberately want to keep an earlier version of the M-bias report. 
                         Default: OFF.

--parallel <int>         May also be --multicore <int>. Sets the number of cores to be used for the methylation extraction process.
                         If system resources are plentiful this is a viable option to speed up the extraction process (we observed a
                         near linear speed increase for up to 10 cores used). Please note that a typical process of extracting a BAM file
                         and writing out '.gz' output streams will in fact use ~3 cores per value of --parallel <int>
                         specified (1 for the methylation extractor itself, 1 for a Samtools stream, 1 for GZIP stream), so
                         --parallel 10 is likely to use around 30 cores of system resources. This option has no bearing
                         on the bismark2bedGraph or genome-wide cytosine report processes.

--yacht                  This option (Yet Another Context Hunting Tool) writes out additional information about the read a methylation
                         call belongs to, and its output is meant to be fed into the NOMe_filtering script. This option writes out
                         a single 'any_C_context' file that contains all methylation calls for a read consecutively. Its intended use
                         is single-cell NOMe-Seq data, and thus this option works only in single-end mode (paired-end reads often suffer
                         from chimaera problems...)

                         --yacht will add three additional columns to the standard methylation call files:
                                    <read start>  <read end>  <read orientation>
                         For forward reads (+ orientation) the start position is the left-most position wheras for reverse reads
                         (- orientation) it is the rightmost position.


bedGraph specific options:
==========================

--bedGraph               After finishing the methylation extraction, the methylation output is written into a
                         sorted bedGraph file that reports the position of a given cytosine and its methylation 
                         state (in %, see details below). The methylation extractor output is temporarily split up into
                         temporary files, one per chromosome (written into the current directory or folder
                         specified with -o/--output); these temp files are then used for sorting and deleted
                         afterwards. By default, only cytosines in CpG context will be sorted. The option
                         '--CX_context' may be used to report all cytosines irrespective of sequence context
                         (this will take MUCH longer!). The default folder for temporary files during the sorting
                         process is the output directory. The bedGraph conversion step is performed by the external
                         module 'bismark2bedGraph'; this script needs to reside in the same folder as the 
                         bismark_methylation_extractor itself.

--zero_based             Write out an additional coverage file (ending in .zero.cov) that uses 0-based genomic start
                         and 1-based genomic end coordinates (zero-based, half-open), like used in the bedGraph file,
                         instead of using 1-based coordinates throughout. Default: OFF.


--cutoff [threshold]     The minimum number of times any methylation state (methylated or unmethylated) has to be seen
                         for a nucleotide before its methylation percentage is reported. Default: 1.

--remove_spaces          Replaces whitespaces in the sequence ID field with underscores to allow sorting.

--CX/--CX_context        The sorted bedGraph output file contains information on every single cytosine that was covered
                         in the experiment irrespective of its sequence context. This applies to both forward and
                         reverse strands. Please be aware that this option may generate large temporary and output files
                         and may take a long time to sort (up to many hours). Default: OFF.
                         (i.e. Default = CpG context only).

--buffer_size <string>   This allows you to specify the main memory sort buffer when sorting the methylation information.
                         Either specify a percentage of physical memory by appending % (e.g. --buffer_size 50%) or
                         a multiple of 1024 bytes, e.g. 'K' multiplies by 1024, 'M' by 1048576 and so on for 'T' etc. 
                         (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line.
                         Defaults to 2G.

--scaffolds/--gazillion  Users working with unfinished genomes sporting tens or even hundreds of thousands of
                         scaffolds/contigs/chromosomes frequently encountered errors with pre-sorting reads to
                         individual chromosome files. These errors were caused by the operating system's limit
                         of the number of filehandle that can be written to at any one time (typically 1024; to
                         find out this limit on Linux, type: ulimit -a).
                         To bypass the limitation of open filehandles, the option --scaffolds does not pre-sort
                         methylation calls into individual chromosome files. Instead, all input files are
                         temporarily merged into a single file (unless there is only a single file), and this
                         file will then be sorted by both chromosome AND position using the Unix sort command.
                         Please be aware that this option might take a looooong time to complete, depending on
                         the size of the input files, and the memory you allocate to this process (see --buffer_size).
                         Nevertheless, it seems to be working.

--ample_memory           Using this option will not sort chromosomal positions using the UNIX 'sort' command, but will
                         instead use two arrays to sort methylated and unmethylated calls. This may result in a faster
                         sorting process of very large files, but this comes at the cost of a larger memory footprint
                         (two arrays of the length of the largest human chromosome 1 (~250M bp) consume around 16GB
                         of RAM). Due to overheads in creating and looping through these arrays it seems that it will
                         actually be *slower* for small files (few million alignments), and we are currently testing at
                         which point it is advisable to use this option. Note that --ample_memory is not compatible
                         with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).

--ucsc                   Writes out an additional bedGraph file (ending in '_UCSC.bedGraph.gz') that is compatible with the
                    	 UCSC genome browser. If alignments were carried out against an Ensembl version of the genome, this
                         step will prefix chromosome names with 'chr', and rename the mitochondrial chromosome from 'MT' to
                    	 'chrM'. In addition, this wite out a tab delimited file ending in '.chromosome_sizes.txt' to enable
						 tools such as bedGraphToBigWig.


Genome-wide cytosine methylation report specific options:
=========================================================

--cytosine_report        After the conversion to bedGraph has completed, the option '--cytosine_report' produces a
                         genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based
                         chromosome coordinates (zero-based start coords are optional) and reports CpG context only (all
                         cytosine context is optional). The output considers all Cs on both forward and reverse strands and
                         reports their position, strand, trinucleotide content and methylation state (counts are 0 if not
                         covered). The cytosine report conversion step is performed by the external module
                         'coverage2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor
                         itself.

--CX/--CX_context        The output file contains information on every single cytosine in the genome irrespective of
                         its context. This applies to both forward and reverse strands. Please be aware that this will
                         generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.
                         Default: OFF (i.e. Default = CpG context only).

--zero_based             Uses 0-based genomic coordinates instead of 1-based coordinates. Default: OFF.

--genome_folder <path>   Enter the genome folder you wish to use to extract sequences from (full path only). Accepted
                         formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.

--split_by_chromosome    Writes the output into individual files for each chromosome instead of a single output file. Files
                         will be named to include the input filename and the chromosome number.



OUTPUT:

The bismark_methylation_extractor output is in the form:
========================================================
<seq-ID>  <methylation state*>  <chromosome>  <start position (= end position)>  <methylation call>

* Methylated cytosines receive a '+' orientation,
* Unmethylated cytosines receive a '-' orientation.

The bismark_methylation_extractor output with --yacht (optional) specified is in the form:
==========================================================================================
<seq-ID>  <methylation state*>  <chromosome>  <start position (= end position)>  <methylation call>  <read start>  <read end>  <read orientation>

* Methylated cytosines receive a '+' orientation,
* Unmethylated cytosines receive a '-' orientation.


The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):
=========================================================================================================

track type=bedGraph (header line)

<chromosome>  <start position>  <end position>  <methylation percentage>


The coverage output looks like this (tab-delimited, 1-based genomic coords; zero-based half-open coordinates available with '--zero_based'):
============================================================================================================================================

<chromosome>  <start position>  <end position>  <methylation percentage>  <count methylated>  <count non-methylated>




The genome-wide cytosine methylation output file is tab-delimited in the following format:
==========================================================================================
<chromosome>  <position>  <strand>  <count methylated>  <count non-methylated>  <C-context>  <trinucleotide context>



This script was last modified on 30 October 2020.

HOW_TO
}
